From f816fa06b3ac12044eab4d3cb93fc7796aa0769f Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 00:25:35 -0400 Subject: [PATCH 01/33] [wip] implement part 4 job management - job creation and results trigger (relates to https://github.com/opengeospatial/ogcapi-processes/pull/437, relates to https://github.com/opengeospatial/ogcapi-processes/issues/419, relates to https://github.com/crim-ca/weaver/issues/716) --- CHANGES.rst | 3 +- tests/test_utils.py | 46 +++--- weaver/processes/execution.py | 76 ++++++--- weaver/processes/utils.py | 3 - weaver/status.py | 83 ++++++++-- weaver/typedefs.py | 1 + weaver/utils.py | 39 +++++ weaver/wps/service.py | 14 +- weaver/wps_restapi/api.py | 13 ++ .../examples/job_status_created.json | 7 + weaver/wps_restapi/jobs/jobs.py | 152 +++++++++++++++++- weaver/wps_restapi/jobs/utils.py | 89 +++++++--- weaver/wps_restapi/processes/processes.py | 31 +--- weaver/wps_restapi/providers/providers.py | 24 ++- weaver/wps_restapi/providers/utils.py | 14 +- weaver/wps_restapi/swagger_definitions.py | 85 +++++++--- weaver/xml_util.py | 2 +- 17 files changed, 524 insertions(+), 158 deletions(-) create mode 100644 weaver/wps_restapi/examples/job_status_created.json diff --git a/CHANGES.rst b/CHANGES.rst index 281639ae9..dda197b28 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,7 +12,8 @@ Changes Changes: -------- -- No change. +- Add support of *OGC API - Processes - Part 4: Job Management* endpoints for `Job` creation and execution + (fixes `#716 `_). Fixes: ------ diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d086a845..9a92c40d8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -111,6 +111,8 @@ AWS_S3_REGION_SUBSET_WITH_MOCK = {MOCK_AWS_REGION} | AWS_S3_REGION_SUBSET AWS_S3_REGION_NON_DEFAULT = list(AWS_S3_REGION_SUBSET_WITH_MOCK - {MOCK_AWS_REGION})[0] +KNOWN_STATUSES = set(Status.values()) - {Status.UNKNOWN} + # pylint: disable=R1732,W1514 # not using with open + encoding @@ -391,32 +393,24 @@ def test_pass_http_error_raises_other_error_with_multi_pyramid_error(): pass_http_error(ex, [HTTPConflict, HTTPInternalServerError]) -def get_status_variations(status_value): - return [status_value.lower(), - status_value.upper(), - status_value.capitalize(), - f"Process{status_value.capitalize()}"] - - -def test_map_status_ogc_compliant(): - known_statuses = set(Status.values()) - {Status.UNKNOWN} - for sv in known_statuses: - for s in get_status_variations(sv): - assert map_status(s, StatusCompliant.OGC) in JOB_STATUS_CATEGORIES[StatusCompliant.OGC] - - -def test_map_status_pywps_compliant(): - known_statuses = set(Status.values()) - {Status.UNKNOWN} - for sv in known_statuses: - for s in get_status_variations(sv): - assert map_status(s, StatusCompliant.PYWPS) in JOB_STATUS_CATEGORIES[StatusCompliant.PYWPS] - - -def test_map_status_owslib_compliant(): - known_statuses = set(Status.values()) - {Status.UNKNOWN} - for sv in known_statuses: - for s in get_status_variations(sv): - assert map_status(s, StatusCompliant.OWSLIB) in JOB_STATUS_CATEGORIES[StatusCompliant.OWSLIB] +@pytest.mark.parametrize( + ["compliance", "status"], + itertools.product( + list(StatusCompliant), + itertools.chain.from_iterable( + [ + status.lower(), + status.upper(), + status.capitalize(), + f"Process{status.capitalize()}" + ] + for status in KNOWN_STATUSES + ) + ) +) +def test_map_status_compliant(compliance, status): + # type: (StatusCompliant, str) -> None + assert map_status(status, compliance) in JOB_STATUS_CATEGORIES[compliance] def test_map_status_back_compatibility_and_special_cases(): diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index e63f9bd20..c3cd767e8 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -12,6 +12,7 @@ from owslib.wps import BoundingBoxDataInput, ComplexDataInput from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable from pyramid_celery import celery_app as app +from werkzeug.wrappers.request import Request as WerkzeugRequest from weaver.database import get_db from weaver.datatype import Process, Service @@ -34,10 +35,12 @@ from weaver.utils import ( apply_number_with_unit, as_int, + extend_instance, fully_qualified_name, get_any_id, get_any_value, get_header, + get_path_kvp, get_registry, get_settings, now, @@ -48,6 +51,7 @@ wait_secs ) from weaver.visibility import Visibility +from weaver.wps.service import get_pywps_service from weaver.wps.utils import ( check_wps_status, get_wps_client, @@ -56,6 +60,7 @@ get_wps_output_dir, get_wps_output_path, get_wps_output_url, + get_wps_path, load_pywps_config ) from weaver.wps_restapi import swagger_definitions as sd @@ -79,6 +84,7 @@ AnyProcessRef, AnyResponseType, AnyServiceRef, + AnyViewResponse, AnyValueType, CeleryResult, HeaderCookiesType, @@ -116,8 +122,6 @@ def execute_process(task, job_id, wps_url, headers=None): """ Celery task that executes the WPS process job monitoring as status updates (local and remote). """ - from weaver.wps.service import get_pywps_service - LOGGER.debug("Job execute process called.") task_process = get_celery_process() @@ -660,8 +664,30 @@ def map_locations(job, settings): os.symlink(wps_ref, job_ref) -def submit_job(request, reference, tags=None): - # type: (Request, Union[Service, Process], Optional[List[str]]) -> AnyResponseType +def submit_job_dispatch_wps(request, process): + # type: (Request, Process) -> AnyViewResponse + """ + Dispatch a :term:`XML` request to the relevant :term:`Process` handler using the :term:`WPS` endpoint. + + Sends the :term:`XML` request to the :term:`WPS` endpoint which knows how to parse it properly. + Execution will end up in the same :func:`submit_job_handler` function as for :term:`OGC API` :term:`JSON` execution. + + .. warning:: + The function assumes that :term:`XML` was pre-validated as present in the :paramref:`request`. + """ + service = get_pywps_service() + wps_params = {"version": "1.0.0", "request": "Execute", "service": "WPS", "identifier": process.id} + request.path_info = get_wps_path(request) + request.query_string = get_path_kvp("", **wps_params)[1:] + location = request.application_url + request.path_info + request.query_string + LOGGER.warning("Route redirection [%s] -> [%s] for WPS-XML support.", request.url, location) + http_request = extend_instance(request, WerkzeugRequest) + http_request.shallow = False + return service.call(http_request) + + +def submit_job(request, reference, tags=None, process_id=None): + # type: (Request, Union[Service, Process], Optional[List[str]], Optional[str]) -> AnyResponseType """ Generates the job submission from details retrieved in the request. @@ -683,13 +709,13 @@ def submit_job(request, reference, tags=None): # validate context if needed later on by the job for early failure context = get_wps_output_context(request) - provider_id = None # None OK if local - process_id = None # None OK if remote, but can be found as well if available from WPS-REST path # noqa + prov_id = None # None OK if local + proc_id = None # None OK if remote, but can be found as well if available from WPS-REST path # noqa tags = tags or [] lang = request.accept_language.header_value # can only preemptively check if local process if isinstance(reference, Process): service_url = reference.processEndpointWPS1 - process_id = reference.identifier # explicit 'id:version' process revision if available, otherwise simply 'id' + proc_id = reference.identifier # explicit 'id:version' process revision if available, otherwise simply 'id' visibility = reference.visibility is_workflow = reference.type == ProcessType.WORKFLOW is_local = True @@ -702,8 +728,8 @@ def submit_job(request, reference, tags=None): lang = matched_lang elif isinstance(reference, Service): service_url = reference.url - provider_id = reference.id - process_id = resolve_process_tag(request) + prov_id = reference.id + proc_id = process_id or resolve_process_tag(request) visibility = Visibility.PUBLIC is_workflow = False is_local = False @@ -716,7 +742,7 @@ def submit_job(request, reference, tags=None): user = request.authenticated_userid # FIXME: consider other methods to provide the user headers = dict(request.headers) settings = get_settings(request) - return submit_job_handler(json_body, settings, service_url, provider_id, process_id, is_workflow, is_local, + return submit_job_handler(json_body, settings, service_url, prov_id, proc_id, is_workflow, is_local, visibility, language=lang, headers=headers, tags=tags, user=user, context=context) @@ -787,28 +813,38 @@ def submit_job_handler(payload, # type: ProcessExecution exec_resp = json_body.get("response") subscribers = map_job_subscribers(json_body, settings) + job_pending_created = payload.get("status") == "create" + if job_pending_created: + job_status = Status.CREATED + job_message = "Job created with pending trigger." + else: + job_status = Status.ACCEPTED + job_message = "Job task submitted for execution." + store = db.get_store(StoreJobs) # type: StoreJobs - job = store.save_job(task_id=Status.ACCEPTED, process=process, service=provider_id, + job = store.save_job(task_id=job_status, process=process, service=provider_id, inputs=json_body.get("inputs"), outputs=json_body.get("outputs"), is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, context=context, execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, accept_language=language, subscribers=subscribers) - job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) + job.save_log(logger=LOGGER, message=job_message, status=job_status, progress=0) job = store.update_job(job) location_url = job.status_url(settings) resp_headers = {"Location": location_url} resp_headers.update(applied) - wps_url = clean_ows_url(service_url) - result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) # type: CeleryResult - LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) - if not is_execute_async: + task_result = None # type: Optional[CeleryResult] + if not job_pending_created: + wps_url = clean_ows_url(service_url) + task_result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) + LOGGER.debug("Celery pending task [%s] for job [%s].", task_result.id, job.id) + if not job_pending_created and not is_execute_async: LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) try: - result.wait(timeout=wait) + task_result.wait(timeout=wait) except CeleryTaskTimeoutError: pass - if result.ready(): + if task_result.ready(): job = store.fetch_by_id(job.id) # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response @@ -833,8 +869,8 @@ def submit_job_handler(payload, # type: ProcessExecution "jobID": job.id, "processID": job.process, "providerID": provider_id, # dropped by validator if not applicable - "status": map_status(Status.ACCEPTED), - "location": location_url + "status": map_status(job_status), + "location": location_url, # for convenience/backward compatibility, but official is Location *header* } resp = get_job_submission_response(body, resp_headers) return resp diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index 331df523f..610881d95 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -107,9 +107,6 @@ UpdateFields = List[Union[str, UpdateFieldListMethod]] -# FIXME: -# https://github.com/crim-ca/weaver/issues/215 -# define common Exception classes that won't require this type of conversion def get_process(process_id=None, request=None, settings=None, store=None, revision=True): # type: (Optional[str], Optional[PyramidRequest], Optional[SettingsType], Optional[StoreProcesses], bool) -> Process """ diff --git a/weaver/status.py b/weaver/status.py index 45042655f..5016cfc99 100644 --- a/weaver/status.py +++ b/weaver/status.py @@ -9,22 +9,29 @@ class StatusCompliant(ExtendedEnum): OGC = "OGC" PYWPS = "PYWPS" OWSLIB = "OWSLIB" + OPENEO = "OPENEO" class StatusCategory(ExtendedEnum): FINISHED = "FINISHED" RUNNING = "RUNNING" + PENDING = "PENDING" FAILED = "FAILED" class Status(Constants): + CREATED = "created" + QUEUED = "queued" ACCEPTED = "accepted" STARTED = "started" PAUSED = "paused" SUCCEEDED = "succeeded" SUCCESSFUL = "successful" FAILED = "failed" + ERROR = "error" + FINISHED = "finished" RUNNING = "running" + CANCELED = "canceled" DISMISSED = "dismissed" EXCEPTION = "exception" UNKNOWN = "unknown" # don't include in any below collections @@ -33,14 +40,16 @@ class Status(Constants): JOB_STATUS_CATEGORIES = { # note: # OGC compliant (old): [Accepted, Running, Succeeded, Failed] - # OGC compliant (new): [accepted, running, successful, failed, dismissed] + # OGC compliant (new): [accepted, running, successful, failed, dismissed, created] ('created' in Part 4 only) # PyWPS uses: [Accepted, Started, Succeeded, Failed, Paused, Exception] - # OWSLib users: [Accepted, Running, Succeeded, Failed, Paused] (with 'Process' in front) + # OWSLib uses: [Accepted, Running, Succeeded, Failed, Paused] (with 'Process' in front) + # OpenEO uses: [queued, running, finished, error, canceled, created] # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/statusCode.yaml # http://docs.opengeospatial.org/is/14-065/14-065.html#17 # corresponding statuses are aligned vertically for 'COMPLIANT' groups StatusCompliant.OGC: frozenset([ + Status.CREATED, # Part 4: Job Management Status.ACCEPTED, Status.RUNNING, Status.SUCCEEDED, # old (keep it because it matches existing ADES/EMS and other providers) @@ -63,31 +72,50 @@ class Status(Constants): Status.FAILED, Status.PAUSED ]), + StatusCompliant.OPENEO: frozenset([ + Status.CREATED, + Status.QUEUED, + Status.RUNNING, + Status.FINISHED, + Status.ERROR, + Status.CANCELED + ]), # utility categories StatusCategory.RUNNING: frozenset([ Status.ACCEPTED, Status.RUNNING, Status.STARTED, + Status.QUEUED, + Status.PAUSED + ]), + StatusCategory.PENDING: frozenset([ + Status.CREATED, + Status.ACCEPTED, + Status.QUEUED, Status.PAUSED ]), StatusCategory.FINISHED: frozenset([ Status.FAILED, Status.DISMISSED, + Status.CANCELED, Status.EXCEPTION, + Status.ERROR, Status.SUCCEEDED, - Status.SUCCESSFUL + Status.SUCCESSFUL, + Status.FINISHED ]), StatusCategory.FAILED: frozenset([ Status.FAILED, Status.DISMISSED, - Status.EXCEPTION + Status.EXCEPTION, + Status.ERROR ]), } # FIXME: see below detail in map_status about 'successful', partially compliant to OGC statuses # https://github.com/opengeospatial/ogcapi-processes/blob/ca8e90/core/openapi/schemas/statusCode.yaml JOB_STATUS_CODE_API = JOB_STATUS_CATEGORIES[StatusCompliant.OGC] - {Status.SUCCESSFUL} -JOB_STATUS_SEARCH_API = set(list(JOB_STATUS_CODE_API) + [StatusCategory.FINISHED.value.lower()]) +JOB_STATUS_SEARCH_API = set(list(JOB_STATUS_CODE_API) + [Status.FINISHED]) # id -> str STATUS_PYWPS_MAP = {s: _WPS_STATUS._fields[s].lower() for s in range(len(WPS_STATUS))} @@ -100,14 +128,19 @@ class Status(Constants): from weaver.typedefs import Literal, TypeAlias StatusType: Status = Literal[ + Status.CREATED, Status.ACCEPTED, Status.STARTED, + Status.QUEUED, Status.PAUSED, Status.SUCCEEDED, + Status.FINISHED, Status.FAILED, Status.RUNNING, Status.DISMISSED, + Status.CANCELED, Status.EXCEPTION, + Status.ERROR, Status.UNKNOWN ] AnyStatusType = Union[Status, StatusType, int] @@ -116,6 +149,7 @@ class Status(Constants): StatusCategory, Literal[ StatusCategory.RUNNING, + StatusCategory.PENDING, StatusCategory.FINISHED, StatusCategory.FAILED, ], @@ -160,21 +194,48 @@ def map_status(wps_status, compliant=StatusCompliant.OGC): if job_status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: if job_status in [Status.STARTED, Status.PAUSED]: job_status = Status.RUNNING + elif job_status == Status.QUEUED: + job_status = Status.ACCEPTED + elif job_status in [Status.CANCELED, Status.DISMISSED]: + job_status = Status.DISMISSED elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]: - if job_status not in [Status.FAILED, Status.DISMISSED]: - job_status = Status.FAILED + job_status = Status.FAILED + elif job_status == Status.FINISHED: + job_status = Status.SUCCEEDED elif compliant == StatusCompliant.PYWPS: - if job_status == Status.RUNNING: + if job_status in Status.RUNNING: job_status = Status.STARTED - elif job_status == Status.DISMISSED: + elif job_status in [Status.DISMISSED, Status.CANCELED]: job_status = Status.FAILED + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]: + job_status = Status.EXCEPTION + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.PENDING]: + job_status = Status.PAUSED + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + job_status = Status.SUCCEEDED elif compliant == StatusCompliant.OWSLIB: - if job_status == Status.STARTED: + if job_status in JOB_STATUS_CATEGORIES[StatusCategory.PENDING]: + job_status = Status.PAUSED + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: job_status = Status.RUNNING - elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED] and job_status != Status.FAILED: + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]: job_status = Status.FAILED + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + job_status = Status.SUCCEEDED + + elif compliant == StatusCompliant.OPENEO: + if job_status in JOB_STATUS_CATEGORIES[StatusCategory.PENDING]: + job_status = Status.QUEUED + elif job_status == Status.DISMISSED: + job_status = Status.CANCELED + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: + job_status = Status.RUNNING + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]: + job_status = Status.ERROR + elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + job_status = Status.FINISHED # FIXME: new official status is 'successful', but this breaks everywhere (tests, local/remote execute, etc.) # https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-core/statusCode.yaml diff --git a/weaver/typedefs.py b/weaver/typedefs.py index ae77096d5..a89521090 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -959,6 +959,7 @@ class CWL_SchemaName(Protocol): }, total=True) ProcessExecution = TypedDict("ProcessExecution", { + "status": NotRequired[Literal["create"]], "mode": NotRequired[AnyExecuteMode], "response": NotRequired[AnyExecuteResponse], "inputs": NotRequired[ExecutionInputs], diff --git a/weaver/utils.py b/weaver/utils.py index 7f98b996c..effa9c3ae 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -120,6 +120,7 @@ JSON, KVP, KVP_Item, + Link, Literal, Number, OpenAPISchema, @@ -1385,6 +1386,44 @@ def get_href_headers(path, # type: str return headers +def make_link_header( + href, # type: Union[str, Link] + hreflang=None, # type: Optional[str] + rel=None, # type: Optional[str] + type=None, # type: Optional[str] # noqa + title=None, # type: Optional[str] + charset=None, # type: Optional[str] +): # type: (...) -> str + """ + Creates the HTTP Link (:rfc:`8288`) header value from input parameters or a dictionary representation. + + Parameter names are specifically selected to allow direct unpacking from the dictionary representation. + Otherwise, a dictionary can be passed as the first parameter, allowing other parameters to act as override values. + Alternatively, all parameters can be supplied individually. + + .. note:: + Parameter :paramref:`rel` is optional to allow unpacking with a single parameter, + but its value is required to form a valid ``Link`` header. + """ + if isinstance(href, dict): + rel = rel or href.get("rel") + type = type or href.get("type") # noqa + title = title or href.get("title") + charset = charset or href.get("charset") # noqa + hreflang = hreflang or href.get("hreflang") + href = href["href"] + link = f"<{href}>; rel=\"{rel}\"" + if type: + link += f"; type=\"{type}\"" + if charset: + link += f"; charset=\"{charset}\"" + if title: + link += f"; title=\"{title}\"" + if hreflang: + link += f"; hreflang={hreflang}" + return link + + def get_base_url(url): # type: (str) -> str """ diff --git a/weaver/wps/service.py b/weaver/wps/service.py index 84359d510..192d53ddd 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -258,15 +258,17 @@ def prepare_process_for_execution(self, identifier): def execute(self, identifier, wps_request, uuid): # type: (str, Union[WPSRequest, WorkerRequest], str) -> Union[WPSResponse, HTTPValid] """ - Handles the ``Execute`` KVP/XML request submitted on the WPS endpoint. + Handles the ``Execute`` :term:`KVP`/:term:`XML` request submitted on the :term:`WPS` endpoint. - Submit WPS request to corresponding WPS-REST endpoint and convert back for requested ``Accept`` content-type. + Submit :term:`WPS` request to corresponding :term:`WPS-REST` endpoint and convert back for + requested ``Accept`` content-type. - Overrides the original execute operation, that will instead be handled by :meth:`execute_job` following - callback from Celery Worker, which handles process job creation and monitoring. + Overrides the original execute operation, that will instead be handled by :meth:`execute_job` + following callback from :mod:`celery` worker, which handles :term:`Job` creation and monitoring. - If ``Accept`` is JSON, the result is directly returned from :meth:`_submit_job`. - If ``Accept`` is XML or undefined, :class:`WorkerExecuteResponse` converts the received JSON with XML template. + If ``Accept`` is :term:`JSON`, the result is directly returned from :meth:`_submit_job`. + If ``Accept`` is :term:`XML` or undefined, :class:`WorkerExecuteResponse` converts the + received :term:`JSON` with :term:`XML` template. """ result = self._submit_job(wps_request) if not isinstance(result, dict): diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index a736df12b..edb386710 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -97,6 +97,7 @@ def get_conformance(category, settings): ogcapi_proc_core = "http://www.opengis.net/spec/ogcapi-processes-1/1.0" ogcapi_proc_part2 = "http://www.opengis.net/spec/ogcapi-processes-2/1.0" ogcapi_proc_part3 = "http://www.opengis.net/spec/ogcapi-processes-3/0.0" + ogcapi_proc_part4 = "http://www.opengis.net/spec/ogcapi-processes-4/1.0" ogcapi_proc_apppkg = "http://www.opengis.net/spec/eoap-bp/1.0" # FIXME: https://github.com/crim-ca/weaver/issues/412 # ogcapi_proc_part3 = "http://www.opengis.net/spec/ogcapi-processes-3/1.0" @@ -475,6 +476,18 @@ def get_conformance(category, settings): # FIXME: support openEO processes (https://github.com/crim-ca/weaver/issues/564) # f"{ogcapi_proc_part3}/conf/openeo-workflows", # f"{ogcapi_proc_part3}/req/openeo-workflows", + f"{ogcapi_proc_part4}/conf/jm/create/post-op", + f"{ogcapi_proc_part4}/per/job-management/additional-status-codes", # see 'weaver.status.map_status' + f"{ogcapi_proc_part4}/per/job-management/create-body", # Weaver has XML for WPS + f"{ogcapi_proc_part4}/per/job-management/create-content-schema", + f"{ogcapi_proc_part4}/per/job-management/update-body", + f"{ogcapi_proc_part4}/per/job-management/update-content-schema", + # FIXME: support part 3: Nested Workflow Execution request (https://github.com/crim-ca/weaver/issues/412) + # f"{ogcapi_proc_part4}/rec/job-management/create-body-ogcapi-processes", + # FIXME: support openEO processes (https://github.com/crim-ca/weaver/issues/564) + # f"{ogcapi_proc_part4}/rec/job-management/create-body-openeo", + f"{ogcapi_proc_part4}/req/job-management/create/post-op", + f"{ogcapi_proc_part4}/req/job-management/update/response-locked", # FIXME: employ 'weaver.wps_restapi.quotation.utils.check_quotation_supported' to add below conditionally # FIXME: https://github.com/crim-ca/weaver/issues/156 (billing/quotation) # https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/billing diff --git a/weaver/wps_restapi/examples/job_status_created.json b/weaver/wps_restapi/examples/job_status_created.json new file mode 100644 index 000000000..bec621212 --- /dev/null +++ b/weaver/wps_restapi/examples/job_status_created.json @@ -0,0 +1,7 @@ +{ + "description": "Job successfully submitted for creation. Waiting on trigger request to being execution.", + "jobID": "797c0c5e-9bc2-4bf3-ab73-5f3df32044a8", + "processID": "Echo", + "status": "created", + "location": "http://schema-example.com/processes/Echo/jobs/797c0c5e-9bc2-4bf3-ab73-5f3df32044a8" +} diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 749a338b1..9193903f4 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -3,18 +3,33 @@ from box import Box from celery.utils.log import get_task_logger from colander import Invalid -from pyramid.httpexceptions import HTTPBadRequest, HTTPOk, HTTPPermanentRedirect, HTTPUnprocessableEntity +from pyramid.httpexceptions import ( + HTTPBadRequest, + HTTPOk, + HTTPPermanentRedirect, + HTTPUnprocessableEntity, + HTTPUnsupportedMediaType +) +from weaver import xml_util from weaver.database import get_db from weaver.datatype import Job -from weaver.exceptions import JobNotFound, JobStatisticsNotFound, log_unhandled_exceptions -from weaver.formats import ContentType, OutputFormat, add_content_type_charset, guess_target_format, repr_json +from weaver.exceptions import JobNotFound, JobStatisticsNotFound, ProcessNotFound, log_unhandled_exceptions +from weaver.formats import ( + ContentType, + OutputFormat, + add_content_type_charset, + clean_media_type_format, + guess_target_format, + repr_json +) from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema +from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler from weaver.processes.utils import get_process from weaver.processes.wps_package import mask_process_inputs from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory from weaver.store.base import StoreJobs -from weaver.utils import get_settings +from weaver.utils import get_header, get_settings from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.jobs.utils import ( dismiss_job_task, @@ -23,10 +38,12 @@ get_job_results_response, get_results, get_schema_query, - raise_job_bad_status, + raise_job_bad_status_locked, + raise_job_bad_status_success, raise_job_dismissed, validate_service_process ) +from weaver.wps_restapi.providers.utils import get_service from weaver.wps_restapi.swagger_definitions import datetime_interval_parser if TYPE_CHECKING: @@ -179,6 +196,96 @@ def _job_list(_jobs): # type: (Iterable[Job]) -> List[JSON] return Box(body) +@sd.jobs_service.post( + tags=[sd.TAG_EXECUTE, sd.TAG_JOBS], + content_type=list(ContentType.ANY_XML), + schema=sd.PostJobsEndpointXML(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.post_jobs_responses, +) +@sd.jobs_service.post( + tags=[sd.TAG_EXECUTE, sd.TAG_JOBS, sd.TAG_PROCESSES], + content_type=ContentType.APP_JSON, + schema=sd.PostJobsEndpointJSON(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.post_jobs_responses, +) +def create_job(request): + # type: (PyramidRequest) -> AnyViewResponse + """ + Create a new processing job with advanced management and execution capabilities. + """ + proc_id = None + prov_id = None + try: + ctype = get_header("Content-Type", request.headers, default=ContentType.APP_JSON) + ctype = clean_media_type_format(ctype, strip_parameters=True) + if ctype == ContentType.APP_JSON and "process" in request.json_body: + proc_url = request.json_body["process"] + proc_url = sd.ProcessURL().deserialize(proc_url) + prov_url, proc_id = proc_url.rsplit("/processes/", 1) + prov_parts = prov_url.rsplit("/providers/", 1) + prov_id = prov_parts[-1] if len(prov_parts) > 1 else None + elif ctype in ContentType.ANY_XML: + body_xml = xml_util.fromstring(request.text) + proc_id = body_xml.xpath("ows:Identifier", namespaces=body_xml.getroot().nsmap)[0].text + except Exception as exc: + raise ProcessNotFound(json={ + "title": "NoSuchProcess", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process", + "detail": "Process URL or identifier reference missing or invalid.", + "status": ProcessNotFound.code, + }) from exc + if not proc_id: + raise HTTPUnsupportedMediaType(json={ + "title": "Unsupported Media Type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "detail": "Process URL or identifier reference missing or invalid.", + "status": HTTPUnsupportedMediaType.code, + "cause": {"headers": {"Content-Type": ctype}}, + }) + + if ctype in ContentType.ANY_XML: + process = get_process(process_id=proc_id) + return submit_job_dispatch_wps(request, process) + + if prov_id: + ref = get_service(request, provider_id=prov_id) + else: + ref = get_process(process_id=proc_id) + proc_id = None # ensure ref is used, process ID needed only for provider + return submit_job(request, ref, process_id=proc_id, tags=["wps-rest", "ogc-api"]) + + +@sd.process_results_service.post( + tags=[sd.TAG_JOBS, sd.TAG_EXECUTE, sd.TAG_RESULTS, sd.TAG_PROCESSES], + schema=sd.ProcessJobResultsTriggerExecutionEndpoint(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.post_job_results_responses, +) +@sd.job_results_service.post( + tags=[sd.TAG_JOBS, sd.TAG_EXECUTE, sd.TAG_RESULTS], + schema=sd.JobResultsTriggerExecutionEndpoint(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.post_job_results_responses, +) +def trigger_job_execution(request): + # type: (PyramidRequest) -> AnyResponseType + """ + Trigger the execution of a previously created job. + """ + job = get_job(request) + raise_job_dismissed(job, request) + raise_job_bad_status_locked(job, request) + # FIXME: reuse job, adjust function or map parameters from attributes + # FIXME: alt 202 code for accepted on async when triggered this way + return submit_job_handler(request, job) + + @sd.provider_job_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATUS, sd.TAG_PROVIDERS], schema=sd.ProviderJobEndpoint(), @@ -211,6 +318,39 @@ def get_job_status(request): return HTTPOk(json=job_status) +@sd.provider_job_service.patch( + tags=[sd.TAG_JOBS, sd.TAG_PROVIDERS], + schema=sd.PatchProviderJobEndpoint(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.patch_provider_job_responses, +) +@sd.process_job_service.patch( + tags=[sd.TAG_JOBS, sd.TAG_PROCESSES], + schema=sd.PatchProcessJobEndpoint(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.patch_process_job_responses, +) +@sd.job_service.patch( + tags=[sd.TAG_JOBS], + schema=sd.PatchJobEndpoint(), + accept=ContentType.APP_JSON, + renderer=OutputFormat.JSON, + response_schemas=sd.patch_job_responses, +) +def update_job(request): + # type: (PyramidRequest) -> AnyResponseType + """ + Update a previously created job still pending execution. + """ + job = get_job(request) + raise_job_dismissed(job, request) + raise_job_bad_status_locked(job, request) + + raise NotImplementedError # FIXME + + @sd.provider_job_service.delete( tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROVIDERS], schema=sd.ProviderJobEndpoint(), @@ -381,7 +521,7 @@ def get_job_outputs(request): """ job = get_job(request) raise_job_dismissed(job, request) - raise_job_bad_status(job, request) + raise_job_bad_status_success(job, request) schema = get_schema_query(request.params.get("schema")) results, _ = get_results(job, request, schema=schema, link_references=False) outputs = {"outputs": results} diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 2f9c4e321..16410746e 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -11,6 +11,7 @@ HTTPCreated, HTTPForbidden, HTTPInternalServerError, + HTTPLocked, HTTPNoContent, HTTPNotFound, HTTPNotImplemented, @@ -48,7 +49,8 @@ get_secure_path, get_settings, get_weaver_url, - is_uuid + is_uuid, + make_link_header ) from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location @@ -461,7 +463,7 @@ def get_job_results_response(job, container, headers=None): :param headers: Additional headers to provide in the response. """ raise_job_dismissed(job, container) - raise_job_bad_status(job, container) + raise_job_bad_status_success(job, container) # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' # See: @@ -580,8 +582,18 @@ def get_job_submission_response(body, headers, error=False): body["description"] = http_desc return http_class(json=body, headerlist=headers) - body["description"] = sd.CreatedLaunchJobResponse.description + if status == Status.CREATED: + body["description"] = ( + "Job successfully submitted for creation. " + "Waiting on trigger request to being execution." + ) + else: + body["description"] = ( + "Job successfully submitted to processing queue. " + "Execution should begin when resources are available." + ) body = sd.CreatedJobStatusSchema().deserialize(body) + headers.setdefault("Location", body["location"]) return HTTPCreated(json=body, headerlist=headers) @@ -663,13 +675,42 @@ def validate_service_process(request): return service_name, process_name -def raise_job_bad_status(job, container=None): +def raise_job_bad_status_locked(job, container=None): + # type: (Job, Optional[AnySettingsContainer]) -> None + """ + Raise the appropriate message for :term:`Job` unable to be modified. + """ + if job.status != Status.CREATED: + links = job.links(container=container) + headers = [("Link", make_link_header(link)) for link in links] + job_reason = "" + if job.status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + job_reason = " It has already finished execution." + elif job.status in JOB_STATUS_CATEGORIES[StatusCategory.PENDING]: + job_reason = " It is already queued for execution." + elif job.status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: + job_reason = " It is already executing." + raise HTTPLocked( + headers=headers, + json={ + "title": "Job Locked for Execution", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/locked", + "detail": f"Job cannot be modified.{job_reason}", + "status": HTTPLocked.code, + "cause": {"status": job.status}, + "links": links + } + ) + + +def raise_job_bad_status_success(job, container=None): # type: (Job, Optional[AnySettingsContainer]) -> None """ Raise the appropriate message for :term:`Job` not ready or unable to retrieve output results due to status. """ if job.status != Status.SUCCEEDED: links = job.links(container=container) + headers = [("Link", make_link_header(link)) for link in links] if job.status == Status.FAILED: err_code = None err_info = None @@ -697,26 +738,32 @@ def raise_job_bad_status(job, container=None): err_code = OWSNoApplicableCode.code err_info = "unknown" # /req/core/job-results-failed - raise HTTPBadRequest(json={ - "title": "JobResultsFailed", - "type": err_code, - "detail": "Job results not available because execution failed.", - "status": HTTPBadRequest.code, - "cause": err_info, - "links": links - }) + raise HTTPBadRequest( + headers=headers, + json={ + "title": "JobResultsFailed", + "type": err_code, + "detail": "Job results not available because execution failed.", + "status": HTTPBadRequest.code, + "cause": err_info, + "links": links + } + ) # /req/core/job-results-exception/results-not-ready # must use OWS instead of HTTP class to preserve provided JSON body # otherwise, pyramid considers it as not found view/path and rewrites contents in append slash handler - raise OWSNotFound(json={ - "title": "JobResultsNotReady", - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/result-not-ready", - "detail": "Job is not ready to obtain results.", - "status": HTTPNotFound.code, - "cause": {"status": job.status}, - "links": links - }) + raise OWSNotFound( + headers=headers, + json={ + "title": "JobResultsNotReady", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/result-not-ready", + "detail": "Job is not ready to obtain results.", + "status": HTTPNotFound.code, + "cause": {"status": job.status}, + "links": links + } + ) def raise_job_dismissed(job, container=None): @@ -729,7 +776,9 @@ def raise_job_dismissed(job, container=None): settings = get_settings(container) job_links = job.links(settings) job_links = [link for link in job_links if link["rel"] in ["status", "alternate", "collection", "up"]] + headers = [("Link", make_link_header(link)) for link in job_links] raise JobGone( + headers=headers, json={ "title": "JobDismissed", "type": "JobDismissed", diff --git a/weaver/wps_restapi/processes/processes.py b/weaver/wps_restapi/processes/processes.py index 05248e626..7e1750205 100644 --- a/weaver/wps_restapi/processes/processes.py +++ b/weaver/wps_restapi/processes/processes.py @@ -14,7 +14,6 @@ ) from pyramid.response import Response from pyramid.settings import asbool -from werkzeug.wrappers.request import Request as WerkzeugRequest from weaver.database import get_db from weaver.exceptions import ProcessNotFound, ServiceException, log_unhandled_exceptions @@ -28,21 +27,12 @@ ) from weaver.processes import opensearch from weaver.processes.constants import ProcessSchema -from weaver.processes.execution import submit_job +from weaver.processes.execution import submit_job, submit_job_dispatch_wps from weaver.processes.utils import deploy_process_from_payload, get_process, update_process_metadata from weaver.status import Status from weaver.store.base import StoreJobs, StoreProcesses -from weaver.utils import ( - clean_json_text_body, - extend_instance, - fully_qualified_name, - get_any_id, - get_header, - get_path_kvp -) +from weaver.utils import clean_json_text_body, fully_qualified_name, get_any_id, get_header from weaver.visibility import Visibility -from weaver.wps.service import get_pywps_service -from weaver.wps.utils import get_wps_path from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.utils import get_process_list_links, get_processes_filtered_by_valid_schemas from weaver.wps_restapi.providers.utils import get_provider_services @@ -489,20 +479,11 @@ def submit_local_job(request): Execution location and method is according to deployed Application Package. """ process = get_process(request=request) - ctype = clean_media_type_format(get_header("content-type", request.headers, default=None), strip_parameters=True) + ctype = get_header("Content-Type", request.headers, default=None) + ctype = clean_media_type_format(ctype, strip_parameters=True) if ctype in ContentType.ANY_XML: - # Send the XML request to the WPS endpoint which knows how to parse it properly. - # Execution will end up in the same 'submit_job_handler' function as other branch for JSON. - service = get_pywps_service() - wps_params = {"version": "1.0.0", "request": "Execute", "service": "WPS", "identifier": process.id} - request.path_info = get_wps_path(request) - request.query_string = get_path_kvp("", **wps_params)[1:] - location = request.application_url + request.path_info + request.query_string - LOGGER.warning("Route redirection [%s] -> [%s] for WPS-XML support.", request.url, location) - http_request = extend_instance(request, WerkzeugRequest) - http_request.shallow = False - return service.call(http_request) - return submit_job(request, process, tags=["wps-rest"]) + return submit_job_dispatch_wps(request, process) + return submit_job(request, process, tags=["wps-rest", "ogc-api"]) def includeme(config): diff --git a/weaver/wps_restapi/providers/providers.py b/weaver/wps_restapi/providers/providers.py index 9aa752ead..57cbacdb0 100644 --- a/weaver/wps_restapi/providers/providers.py +++ b/weaver/wps_restapi/providers/providers.py @@ -17,8 +17,9 @@ from weaver.exceptions import ServiceNotFound, ServiceParsingError, log_unhandled_exceptions from weaver.formats import ContentType, OutputFormat from weaver.owsexceptions import OWSMissingParameterValue, OWSNotImplemented +from weaver.processes.execution import submit_job from weaver.store.base import StoreServices -from weaver.utils import get_any_id, get_settings +from weaver.utils import get_any_id from weaver.wps.utils import get_wps_client from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.utils import get_process_list_links @@ -141,7 +142,8 @@ def remove_provider(request): """ Remove an existing service provider. """ - service, store = get_service(request) + store = get_db(request).get_store(StoreServices) + service = get_service(request) try: store.delete_service(service.name) @@ -165,7 +167,7 @@ def get_provider(request): """ Get a provider definition (GetCapabilities). """ - service, _ = get_service(request) + service = get_service(request) data = get_schema_ref(sd.ProviderSummarySchema, request, ref_name=False) info = service.summary(request) data.update(info) @@ -208,14 +210,12 @@ def describe_provider_process(request): Note: this processes won't be stored to the local process storage. """ - provider_id = request.matchdict.get("provider_id") - process_id = request.matchdict.get("process_id") - store = get_db(request).get_store(StoreServices) - service = store.fetch_by_name(provider_id) + service = get_service(request) # FIXME: support other providers (https://github.com/crim-ca/weaver/issues/130) wps = get_wps_client(service.url, request) - process = wps.describeprocess(process_id) - return Process.convert(process, service, get_settings(request)) + proc_id = request.matchdict.get("process_id") + process = wps.describeprocess(proc_id) + return Process.convert(process, service, container=request) @sd.provider_process_service.get( @@ -278,11 +278,7 @@ def submit_provider_job(request): """ Execute a remote provider process. """ - from weaver.processes.execution import submit_job # isort:skip # noqa: E402 # pylint: disable=C0413 - - store = get_db(request).get_store(StoreServices) - provider_id = request.matchdict.get("provider_id") - service = store.fetch_by_name(provider_id) + service = get_service(request) return submit_job(request, service, tags=["wps-rest"]) diff --git a/weaver/wps_restapi/providers/utils.py b/weaver/wps_restapi/providers/utils.py index 41e7f6d82..feee53a02 100644 --- a/weaver/wps_restapi/providers/utils.py +++ b/weaver/wps_restapi/providers/utils.py @@ -11,7 +11,7 @@ from weaver.utils import get_settings if TYPE_CHECKING: - from typing import Any, Callable, List, Tuple + from typing import Any, Callable, List, Optional from weaver.datatype import Service from weaver.typedefs import AnyRequestType, AnySettingsContainer @@ -68,15 +68,15 @@ def forbid_local(container): return forbid_local -def get_service(request): - # type: (AnyRequestType) -> Tuple[Service, StoreServices] +def get_service(request, provider_id=None): + # type: (AnyRequestType, Optional[str]) -> Service """ Get the request service using provider_id from the service store. """ store = get_db(request).get_store(StoreServices) - provider_id = request.matchdict.get("provider_id") + prov_id = provider_id or request.matchdict.get("provider_id") try: - service = store.fetch_by_name(provider_id) + service = store.fetch_by_name(prov_id) except ServiceNotFound: - raise HTTPNotFound(f"Provider {provider_id} cannot be found.") - return service, store + raise HTTPNotFound(f"Provider {prov_id} cannot be found.") + return service diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 1d7fa8fb3..a9f2a2fe9 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2143,6 +2143,12 @@ class JobStatusEnum(ExtendedSchemaNode): validator = OneOf(JOB_STATUS_CODE_API) +class JobStatusCreate(ExtendedSchemaNode): + schema_type = String + title = "JobStatus" + validator = OneOf(["create"]) + + class JobStatusSearchEnum(ExtendedSchemaNode): schema_type = String title = "JobStatusSearch" @@ -2550,9 +2556,13 @@ class OWSIdentifier(ExtendedSchemaNode, OWSNamespace): name = "Identifier" -class OWSIdentifierList(ExtendedSequenceSchema, OWSNamespace): +class OWSProcessIdentifier(ProcessIdentifier, OWSNamespace): + pass + + +class OWSProcessIdentifierList(ExtendedSequenceSchema, OWSNamespace): name = "Identifiers" - item = OWSIdentifier() + item = OWSProcessIdentifier() class OWSTitle(ExtendedSchemaNode, OWSNamespace): @@ -2585,7 +2595,7 @@ class WPSDescribeProcessPost(WPSOperationPost, WPSNamespace): _schema = f"{OGC_WPS_1_SCHEMAS}/wpsDescribeProcess_request.xsd" name = "DescribeProcess" title = "DescribeProcess" - identifier = OWSIdentifierList( + identifier = OWSProcessIdentifierList( description="Single or comma-separated list of process identifier to describe.", example="example" ) @@ -2602,7 +2612,7 @@ class WPSExecutePost(WPSOperationPost, WPSNamespace): _schema = f"{OGC_WPS_1_SCHEMAS}/wpsExecute_request.xsd" name = "Execute" title = "Execute" - identifier = OWSIdentifier(description="Identifier of the process to execute with data inputs.") + identifier = OWSProcessIdentifier(description="Identifier of the process to execute with data inputs.") dataInputs = WPSExecuteDataInputs(description="Data inputs to be provided for process execution.") @@ -2776,7 +2786,7 @@ class ProcessVersion(ExtendedSchemaNode, WPSNamespace): class OWSProcessSummary(ExtendedMappingSchema, WPSNamespace): version = ProcessVersion(name="processVersion", default="None", example="1.2", description="Version of the corresponding process summary.") - identifier = OWSIdentifier(example="example", description="Identifier to refer to the process.") + identifier = OWSProcessIdentifier(example="example", description="Identifier to refer to the process.") _title = OWSTitle(example="Example Process", description="Title of the process.") abstract = OWSAbstract(example="Process for example schema.", description="Detail about the process.") @@ -3014,7 +3024,7 @@ class WPSStatus(ExtendedMappingSchema, WPSNamespace): class WPSProcessSummary(ExtendedMappingSchema, WPSNamespace): name = "Process" title = "Process" - identifier = OWSIdentifier() + identifier = OWSProcessIdentifier() _title = OWSTitle() abstract = OWSAbstract(missing=drop) @@ -3328,10 +3338,19 @@ class ProviderResultsEndpoint(ProviderProcessPath, JobPath): header = RequestHeaders() -class JobResultsEndpoint(ProviderProcessPath, JobPath): +class JobResultsEndpoint(JobPath): header = RequestHeaders() +class JobResultsTriggerExecutionEndpoint(JobResultsEndpoint): + header = RequestHeaders() + body = NoContent() + + +class ProcessJobResultsTriggerExecutionEndpoint(JobResultsTriggerExecutionEndpoint, LocalProcessPath): + pass + + class ProviderExceptionsEndpoint(ProviderProcessPath, JobPath): header = RequestHeaders() @@ -4162,6 +4181,14 @@ class Execute(ExecuteInputOutputs): "value": EXAMPLES["job_execute.json"], }, } + status = JobStatusCreate( + description=( + "Status to request creation of the job without submitting it to processing queue " + "and leave it pending until triggered by another results request to start it " + "(see *OGC API - Processes* - Part 4: Job Management)." + ), + missing=drop, + ) mode = JobExecuteModeEnum( missing=drop, default=ExecuteMode.AUTO, @@ -6444,13 +6471,17 @@ class ExecuteHeadersXML(ExecuteHeadersBase): ) -class PostProcessJobsEndpointJSON(LocalProcessPath): +class PostJobsEndpointJSON(ExtendedMappingSchema): header = ExecuteHeadersJSON() querystring = LocalProcessQuery() body = Execute() -class PostProcessJobsEndpointXML(LocalProcessPath): +class PostProcessJobsEndpointJSON(PostJobsEndpointJSON, LocalProcessPath): + pass + + +class PostJobsEndpointXML(ExtendedMappingSchema): header = ExecuteHeadersXML() querystring = LocalProcessQuery() body = WPSExecutePost( @@ -6467,6 +6498,10 @@ class PostProcessJobsEndpointXML(LocalProcessPath): ) +class PostProcessJobsEndpointXML(PostJobsEndpointXML, LocalProcessPath): + pass + + class PagingQueries(ExtendedMappingSchema): page = ExtendedSchemaNode(Integer(allow_string=True), missing=0, default=0, validator=Range(min=0)) limit = ExtendedSchemaNode(Integer(allow_string=True), missing=10, default=10, validator=Range(min=1, max=1000), @@ -6734,7 +6769,7 @@ def __new__(cls, *, name, description, **kwargs): # pylint: disable=W0221 "New schema name must be provided to avoid invalid mixed use of $ref pointers. " f"Name '{name}' is invalid." ) - obj = super().__new__(cls) + obj = super().__new__(cls) # type: ExtendedSchemaNode obj.__init__(name=name, description=description) obj.__class__.__name__ = name obj.children = [ @@ -7129,11 +7164,18 @@ class CreatedJobLocationHeader(ResponseHeaders): class CreatedLaunchJobResponse(ExtendedMappingSchema): - description = "Job successfully submitted to processing queue. Execution should begin when resources are available." + description = ( + "Job successfully submitted. " + "Execution should begin when resources are available or when triggered, according to requested execution mode." + ) examples = { "JobAccepted": { - "summary": "Job accepted for execution.", + "summary": "Job accepted for execution asynchronously.", "value": EXAMPLES["job_status_accepted.json"] + }, + "JobCreated": { + "summary": "Job created for later execution by trigger.", + "value": EXAMPLES["job_status_created.json"] } } header = CreatedJobLocationHeader() @@ -7257,6 +7299,7 @@ class NoContentJobResultsHeaders(NoContent): class NoContentJobResultsResponse(ExtendedMappingSchema): + description = "Job completed execution synchronously with results returned in Link headers." header = NoContentJobResultsHeaders() body = NoContent(default="") @@ -7695,9 +7738,9 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "501": NotImplementedPostProviderResponse(), } post_provider_process_job_responses = { - "200": CompletedJobResponse(description="success"), - "201": CreatedLaunchJobResponse(description="success"), - "204": NoContentJobResultsResponse(description="success"), + "200": CompletedJobResponse(), + "201": CreatedLaunchJobResponse(), + "204": NoContentJobResultsResponse(), "400": InvalidJobParametersResponse(), "403": ForbiddenProviderAccessResponseSchema(), "405": MethodNotAllowedErrorResponseSchema(), @@ -7705,15 +7748,21 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "500": InternalServerErrorResponseSchema(), } post_process_jobs_responses = { - "200": CompletedJobResponse(description="success"), - "201": CreatedLaunchJobResponse(description="success"), - "204": NoContentJobResultsResponse(description="success"), + "200": CompletedJobResponse(), + "201": CreatedLaunchJobResponse(), + "204": NoContentJobResultsResponse(), "400": InvalidJobParametersResponse(), "403": ForbiddenProviderAccessResponseSchema(), "405": MethodNotAllowedErrorResponseSchema(), "406": NotAcceptableErrorResponseSchema(), "500": InternalServerErrorResponseSchema(), } +post_jobs_responses = copy(post_process_jobs_responses) +post_job_results_responses = copy(post_process_jobs_responses) +post_job_results_responses.pop("201") # job already created, therefore invalid +post_job_results_responses.update({ + "202": CreatedLaunchJobResponse(), # alternate to '201' for async case since job already exists +}) get_all_jobs_responses = { "200": OkGetQueriedJobsResponse(description="success", examples={ "JobListing": { diff --git a/weaver/xml_util.py b/weaver/xml_util.py index b0605a281..8e19991f9 100644 --- a/weaver/xml_util.py +++ b/weaver/xml_util.py @@ -53,7 +53,7 @@ def fromstring(text, parser=XML_PARSER): - # type: (AnyStr, lxml_etree.XMLParser) -> XML + # type: (AnyStr, lxml_etree.XMLParser) -> XMLTree from weaver.utils import str2bytes return _lxml_fromstring(str2bytes(text), parser=parser) # nosec: B410 From 467965cbf32980ff49a10147184bde57aedd9553 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 20:40:14 -0400 Subject: [PATCH 02/33] fix circular import error --- weaver/wps/service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/weaver/wps/service.py b/weaver/wps/service.py index 192d53ddd..3dc3945ba 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -21,7 +21,6 @@ from weaver.formats import ContentType, guess_target_format from weaver.owsexceptions import OWSNoApplicableCode from weaver.processes.convert import wps2json_job_payload -from weaver.processes.execution import submit_job_handler from weaver.processes.types import ProcessType from weaver.processes.utils import get_process from weaver.store.base import StoreProcesses @@ -197,6 +196,8 @@ def _submit_job(self, wps_request): Returns the status response as is if XML, or convert it to JSON, according to request ``Accept`` header. """ + from weaver.processes.execution import submit_job_handler # pylint: disable=C0415 # circular import error + req = wps_request.http_request # type: Union[PyramidRequest, WerkzeugRequest] pid = wps_request.identifier ctx = get_wps_output_context(req) # re-validate here in case submitted via WPS endpoint instead of REST-API From efb01da3b78d77d4068b052a57078b57d44c15d5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 22:37:38 -0400 Subject: [PATCH 03/33] [wip] update endpoints for job update/trigger --- weaver/processes/execution.py | 95 ++++++++++++++++------- weaver/wps_restapi/jobs/jobs.py | 34 +++++--- weaver/wps_restapi/swagger_definitions.py | 60 ++++++++++++-- 3 files changed, 145 insertions(+), 44 deletions(-) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index c3cd767e8..df2fa4a9c 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -10,7 +10,7 @@ from celery.utils.log import get_task_logger from owslib.util import clean_ows_url from owslib.wps import BoundingBoxDataInput, ComplexDataInput -from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable +from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable, HTTPUnprocessableEntity, HTTPUnsupportedMediaType from pyramid_celery import celery_app as app from werkzeug.wrappers.request import Request as WerkzeugRequest @@ -69,7 +69,7 @@ LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: - from typing import Dict, List, Optional, Tuple, Union + from typing import Any, Dict, List, Optional, Tuple, Type, Union from uuid import UUID from celery.app.task import Task @@ -695,17 +695,7 @@ def submit_job(request, reference, tags=None, process_id=None): :func:`submit_job_handler` to provide elements pre-extracted from requests or from other parsing. """ # validate body with expected JSON content and schema - if ContentType.APP_JSON not in request.content_type: - raise HTTPBadRequest(json={ - "code": "InvalidHeaderValue", - "name": "Content-Type", - "description": f"Request 'Content-Type' header other than '{ContentType.APP_JSON}' not supported.", - "value": str(request.content_type) - }) - try: - json_body = request.json_body - except Exception as ex: - raise HTTPBadRequest(f"Invalid JSON body cannot be decoded for job submission. [{ex}]") + json_body = validate_job_json(request) # validate context if needed later on by the job for early failure context = get_wps_output_context(request) @@ -763,23 +753,9 @@ def submit_job_handler(payload, # type: ProcessExecution """ Submits the job to the Celery worker with provided parameters. - Assumes that parameters have been pre-fetched and validated, except for the input payload. + Assumes that parameters have been pre-fetched and validated, except for the :paramref:`payload`. """ - try: - json_body = sd.Execute().deserialize(payload) - except colander.Invalid as ex: - raise HTTPBadRequest( - json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ - "type": "InvalidSchema", - "title": "Execute", - "detail": "Execution body failed schema validation.", - "status": HTTPBadRequest.code, - "error": ex.msg, - "cause": ex.asdict(), - "value": repr_json(ex.value), - }) - ) - + json_body = validate_job_schema(payload) db = get_db(settings) # non-local is only a reference, no actual process object to validate @@ -876,6 +852,67 @@ def submit_job_handler(payload, # type: ProcessExecution return resp +def update_job_parameters(job, request): + # type: (Job, Request) -> None + """ + Updates an existing :term:`Job` with new request parameters. + """ + body = validate_job_json(request) + body = validate_job_schema(body, sd.PatchJobBodySchema) + + raise NotImplementedError # FIXME: implement + + +def validate_job_json(request): + # type: (Request) -> JSON + """ + Validates that the request contains valid :term:`JSON` conctens, but not ncessary valid against expected schema. + + .. seealso:: + :func:`validate_job_schema` + """ + if ContentType.APP_JSON not in request.content_type: + raise HTTPUnsupportedMediaType(json={ + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "title": "Unsupported Media-Type", + "detail": f"Request 'Content-Type' header other than '{ContentType.APP_JSON}' is not supported.", + "code": "InvalidHeaderValue", + "name": "Content-Type", + "value": str(request.content_type) + }) + try: + json_body = request.json_body + except Exception as ex: + raise HTTPBadRequest(json={ + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "title": "Bad Request", + "detail": f"Invalid JSON body cannot be decoded for job submission. [{ex}]", + }) + return json_body + + +def validate_job_schema(payload, body_schema=sd.Execute): + # type: (Any, Union[Type[sd.Execute], Type[sd.PatchJobBodySchema]]) -> ProcessExecution + """ + Validates that the input :term:`Job` payload is valid :term:`JSON` for an execution request. + """ + try: + json_body = body_schema().deserialize(payload) + except colander.Invalid as ex: + raise HTTPUnprocessableEntity( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "InvalidSchema", + "title": "Invalid Job Execution Schema", + "detail": "Execution body failed schema validation.", + "status": HTTPBadRequest.code, + "error": ex.msg, + "cause": ex.asdict(), + "value": repr_json(ex.value), + }) + ) + return json_body + + def validate_process_io(process, payload): # type: (Process, ProcessExecution) -> None """ diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 9193903f4..c275852ed 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -6,6 +6,7 @@ from pyramid.httpexceptions import ( HTTPBadRequest, HTTPOk, + HTTPNoContent, HTTPPermanentRedirect, HTTPUnprocessableEntity, HTTPUnsupportedMediaType @@ -24,12 +25,12 @@ repr_json ) from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema -from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler +from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler, update_job_parameters from weaver.processes.utils import get_process from weaver.processes.wps_package import mask_process_inputs from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory from weaver.store.base import StoreJobs -from weaver.utils import get_header, get_settings +from weaver.utils import get_header, get_settings, make_link_header from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.jobs.utils import ( dismiss_job_task, @@ -288,7 +289,7 @@ def trigger_job_execution(request): @sd.provider_job_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATUS, sd.TAG_PROVIDERS], - schema=sd.ProviderJobEndpoint(), + schema=sd.GetProviderJobEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, response_schemas=sd.get_prov_single_job_status_responses, @@ -302,7 +303,7 @@ def trigger_job_execution(request): ) @sd.job_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATUS], - schema=sd.JobEndpoint(), + schema=sd.GetJobEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, response_schemas=sd.get_single_job_status_responses, @@ -339,7 +340,7 @@ def get_job_status(request): renderer=OutputFormat.JSON, response_schemas=sd.patch_job_responses, ) -def update_job(request): +def update_pending_job(request): # type: (PyramidRequest) -> AnyResponseType """ Update a previously created job still pending execution. @@ -347,13 +348,15 @@ def update_job(request): job = get_job(request) raise_job_dismissed(job, request) raise_job_bad_status_locked(job, request) - - raise NotImplementedError # FIXME + update_job_parameters(job, request) + links = job.links(request, self_link="status") + headers = [("Link", make_link_header(link)) for link in links] + return HTTPNoContent(headers=headers) @sd.provider_job_service.delete( tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROVIDERS], - schema=sd.ProviderJobEndpoint(), + schema=sd.DeleteProviderJobEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, response_schemas=sd.delete_prov_job_responses, @@ -367,7 +370,7 @@ def update_job(request): ) @sd.job_service.delete( tags=[sd.TAG_JOBS, sd.TAG_DISMISS], - schema=sd.JobEndpoint(), + schema=sd.DeleteJobEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, response_schemas=sd.delete_job_responses, @@ -486,7 +489,18 @@ def get_job_inputs(request): if schema: job_inputs = convert_input_values_schema(job_inputs, schema) job_outputs = convert_output_params_schema(job_outputs, schema) - body = {"inputs": job_inputs, "outputs": job_outputs} + job_headers = { + "Accept": job.accept_type, + "Accept-Language": job.accept_language, + + } + body = { + "mode": job.execution_mode, + "response": job.execution_response, + "inputs": job_inputs, + "outputs": job_outputs, + "headers": job_headers, + } body.update({"links": job.links(request, self_link="inputs")}) body = sd.JobInputsBody().deserialize(body) return HTTPOk(json=body) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index a9f2a2fe9..589c9b80c 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -705,7 +705,7 @@ class RequestHeaders(ExtendedMappingSchema): content_type = RequestContentTypeHeader() -class ResponseHeaders(ResponseContentTypeHeader): +class ResponseHeaders(ExtendedMappingSchema): """ Headers describing resulting response. """ @@ -3236,11 +3236,11 @@ class ProcessVisibilityPutEndpoint(LocalProcessPath): body = VisibilitySchema() -class ProviderJobEndpoint(ProviderProcessPath, JobPath): +class GetProviderJobEndpoint(ProviderProcessPath, JobPath): header = RequestHeaders() -class JobEndpoint(JobPath): +class GetJobEndpoint(JobPath): header = RequestHeaders() @@ -6502,6 +6502,27 @@ class PostProcessJobsEndpointXML(PostJobsEndpointXML, LocalProcessPath): pass +class PatchJobBodySchema(Execute): + description = "Execution request parameters to be updated." + # all parameters that are not 'missing=drop' must be added to allow partial update + inputs = ExecuteInputValues(missing=drop, description=Execute.inputs.description) + outputs = ExecuteOutputSpec(missing=drop, description=Execute.outputs.description) + + +class PatchJobEndpoint(JobPath): + header = RequestHeaders() + querystring = LocalProcessQuery() + body = PatchJobBodySchema() + + +class PatchProcessJobEndpoint(JobPath, ProcessEndpoint): + body = PatchJobBodySchema() + + +class PatchProviderJobEndpoint(PatchProcessJobEndpoint): + header = RequestHeaders() + + class PagingQueries(ExtendedMappingSchema): page = ExtendedSchemaNode(Integer(allow_string=True), missing=0, default=0, validator=Range(min=0)) limit = ExtendedSchemaNode(Integer(allow_string=True), missing=10, default=10, validator=Range(min=1, max=1000), @@ -6595,11 +6616,19 @@ class GetProcessJobEndpoint(LocalProcessPath): querystring = LocalProcessQuery() -class DeleteProcessJobEndpoint(LocalProcessPath): +class DeleteJobEndpoint(JobPath): header = RequestHeaders() querystring = LocalProcessQuery() +class DeleteProcessJobEndpoint(LocalProcessPath): + header = RequestHeaders() + + +class DeleteProviderJobEndpoint(DeleteProcessJobEndpoint, ProviderProcessPath): + pass + + class BillsEndpoint(ExtendedMappingSchema): header = RequestHeaders() @@ -6846,7 +6875,7 @@ class ConflictRequestResponseSchema(ServerErrorBaseResponseSchema): class UnprocessableEntityResponseSchema(ServerErrorBaseResponseSchema): - description = "Wrong format of given parameters." + description = "Wrong format or schema of given parameters." header = ResponseHeaders() body = ErrorJsonResponseBodySchema() @@ -7232,6 +7261,12 @@ class OkDismissJobResponse(ExtendedMappingSchema): body = DismissedJobSchema() +class NoContentJobUpdatedResponse(ExtendedMappingSchema): + description = "Job detail updated with provided parameters." + header = ResponseHeaders() + body = NoContent() + + class OkGetJobStatusResponse(ExtendedMappingSchema): _schema = f"{OGC_API_PROC_PART1_RESPONSES}/Status.yaml" header = ResponseHeaders() @@ -7745,6 +7780,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "403": ForbiddenProviderAccessResponseSchema(), "405": MethodNotAllowedErrorResponseSchema(), "406": NotAcceptableErrorResponseSchema(), + "415": UnsupportedMediaTypeResponseSchema(), + "422": UnprocessableEntityResponseSchema(), "500": InternalServerErrorResponseSchema(), } post_process_jobs_responses = { @@ -7755,6 +7792,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "403": ForbiddenProviderAccessResponseSchema(), "405": MethodNotAllowedErrorResponseSchema(), "406": NotAcceptableErrorResponseSchema(), + "415": UnsupportedMediaTypeResponseSchema(), + "422": UnprocessableEntityResponseSchema(), "500": InternalServerErrorResponseSchema(), } post_jobs_responses = copy(post_process_jobs_responses) @@ -7808,6 +7847,17 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): get_prov_single_job_status_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) +patch_job_responses = { + "204": NoContentJobUpdatedResponse(), + "404": NotFoundJobResponseSchema(), + "405": MethodNotAllowedErrorResponseSchema(), + "406": NotAcceptableErrorResponseSchema(), + "415": UnsupportedMediaTypeResponseSchema(), + "422": UnprocessableEntityResponseSchema(), + "500": InternalServerErrorResponseSchema(), +} +patch_process_job_responses = copy(patch_job_responses) +patch_provider_job_responses = copy(patch_job_responses) delete_job_responses = { "200": OkDismissJobResponse(description="success", examples={ "JobDismissedSuccess": { From d5a086bb3337e89189059d3ff8f91d29258246bc Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 11 Oct 2024 23:47:16 -0400 Subject: [PATCH 04/33] [wip] add job parameters for inputs response --- weaver/utils.py | 7 -- weaver/wps_restapi/jobs/jobs.py | 21 +++--- weaver/wps_restapi/jobs/utils.py | 9 ++- weaver/wps_restapi/swagger_definitions.py | 85 ++++++++++++++--------- 4 files changed, 73 insertions(+), 49 deletions(-) diff --git a/weaver/utils.py b/weaver/utils.py index 024375fb6..3dd180df2 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -119,10 +119,6 @@ HeadersType, JSON, KVP, -<<<<<<< HEAD - KVP_Item, -======= ->>>>>>> origin/master Link, Literal, Number, @@ -1392,8 +1388,6 @@ def make_link_header( return link -<<<<<<< HEAD -======= def parse_link_header(link_header): # type: (str) -> Link """ @@ -1411,7 +1405,6 @@ def parse_link_header(link_header): return link ->>>>>>> origin/master def get_base_url(url): # type: (str) -> str """ diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 016ed524c..27aef5fae 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from box import Box from celery.utils.log import get_task_logger @@ -24,6 +24,7 @@ guess_target_format, repr_json ) +from weaver.processes.constants import JobInputsOutputsSchema from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler, update_job_parameters from weaver.processes.utils import get_process @@ -480,19 +481,22 @@ def get_job_inputs(request): Retrieve the inputs values and outputs definitions of a job. """ job = get_job(request) - schema = get_schema_query(request.params.get("schema"), strict=False) + schema = cast( + "JobInputsOutputsSchemaType", + get_schema_query(request.params.get("schema"), strict=False, default=JobInputsOutputsSchema.OGC) + ) job_inputs = job.inputs job_outputs = job.outputs if job.is_local: process = get_process(job.process, request=request) job_inputs = mask_process_inputs(process.package, job_inputs) - if schema: - job_inputs = convert_input_values_schema(job_inputs, schema) - job_outputs = convert_output_params_schema(job_outputs, schema) + job_inputs = convert_input_values_schema(job_inputs, schema) + job_outputs = convert_output_params_schema(job_outputs, schema) job_headers = { "Accept": job.accept_type, "Accept-Language": job.accept_language, - + "Prefer": f"return={job.execution_return}" if job.execution_return else None, + "X-WPS-Output-Context": job.context, } body = { "mode": job.execution_mode, @@ -500,8 +504,9 @@ def get_job_inputs(request): "inputs": job_inputs, "outputs": job_outputs, "headers": job_headers, + "subscribers": job.subscribers, + "links": job.links(request, self_link="inputs"), } - body.update({"links": job.links(request, self_link="inputs")}) body = sd.JobInputsBody().deserialize(body) return HTTPOk(json=body) @@ -536,7 +541,7 @@ def get_job_outputs(request): job = get_job(request) raise_job_dismissed(job, request) raise_job_bad_status_success(job, request) - schema = get_schema_query(request.params.get("schema")) + schema = get_schema_query(request.params.get("schema"), default=JobInputsOutputsSchema.OGC) results, _ = get_results(job, request, schema=schema, link_references=False) outputs = {"outputs": results} outputs.update({"links": job.links(request, self_link="outputs")}) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index d297f0642..249bb04d5 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -279,10 +279,13 @@ def get_job_list_links(job_total, filters, request): return links -def get_schema_query(schema, strict=True): - # type: (Optional[JobInputsOutputsSchemaType], bool) -> Optional[JobInputsOutputsSchemaType] +def get_schema_query( + schema, # type: Optional[JobInputsOutputsSchemaType] + strict=True, # type: bool + default=None, # type: Optional[JobInputsOutputsSchemaType] +): # type: (...) -> Optional[JobInputsOutputsSchemaType] if not schema: - return None + return default # unescape query (eg: "OGC+strict" becomes "OGC string" from URL parsing) schema_checked = cast( "JobInputsOutputsSchemaType", diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index fcce8c8db..8c496e1b9 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -696,6 +696,18 @@ class ResponseContentTypeHeader(ContentTypeHeader): ]) +class PreferHeader(ExtendedSchemaNode): + summary = "Header that describes job execution parameters." + description = ( + "Header that describes the desired execution mode of the process job and desired results. " + "Parameter 'return' indicates the structure and contents how results should be returned. " + "Parameter 'wait' and 'respond-async' indicate the execution mode of the process job. " + f"For more details, see {DOC_URL}/processes.html#execution-mode and {DOC_URL}/processes.html#execution-results." + ) + name = "Prefer" + schema_type = String + + class RequestHeaders(ExtendedMappingSchema): """ Headers that can indicate how to adjust the behavior and/or result to be provided in the response. @@ -2102,6 +2114,11 @@ class JobExecuteModeEnum(ExtendedSchemaNode): # no default to enforce required input as per OGC-API schemas # default = EXECUTE_MODE_AUTO example = ExecuteMode.ASYNC + description = ( + "Desired execution mode specified directly. This is intended for backward compatibility support. " + "To obtain more control over execution mode selection, employ the official Prefer header instead " + f"(see for more details: {DOC_URL}/processes.html#execution-mode)." + ) validator = OneOf(ExecuteMode.values()) @@ -2122,6 +2139,10 @@ class JobResponseOptionsEnum(ExtendedSchemaNode): # no default to enforce required input as per OGC-API schemas # default = ExecuteResponse.DOCUMENT example = ExecuteResponse.DOCUMENT + description = ( + "Indicates the desired representation format of the response. " + f"(see for more details: {DOC_URL}/processes.html#execution-body)." + ) validator = OneOf(ExecuteResponse.values()) @@ -3258,7 +3279,7 @@ class JobInputsOutputsQuery(ExtendedMappingSchema): String(), title="JobInputsOutputsQuerySchema", example=JobInputsOutputsSchema.OGC, - default=JobInputsOutputsSchema.OLD, + default=JobInputsOutputsSchema.OGC, validator=OneOfCaseInsensitive(JobInputsOutputsSchema.values()), summary="Selects the schema employed for representation of submitted job inputs and outputs.", description=( @@ -3281,7 +3302,7 @@ class JobResultsQuery(FormatQuery): String(), title="JobOutputResultsSchema", example=JobInputsOutputsSchema.OGC, - default=JobInputsOutputsSchema.OLD, + default=JobInputsOutputsSchema.OGC, validator=OneOfCaseInsensitive(JobInputsOutputsSchema.values()), summary="Selects the schema employed for representation of job outputs.", description=( @@ -4198,20 +4219,9 @@ class Execute(ExecuteInputOutputs): missing=drop, default=ExecuteMode.AUTO, deprecated=True, - description=( - "Desired execution mode specified directly. This is intended for backward compatibility support. " - "To obtain more control over execution mode selection, employ the official Prefer header instead " - f"(see for more details: {DOC_URL}/processes.html#execution-mode)." - ), - validator=OneOf(ExecuteMode.values()) ) response = JobResponseOptionsEnum( missing=drop, # no default to ensure 'Prefer' header vs 'response' body resolution order can be performed - description=( - "Indicates the desired representation format of the response. " - f"(see for more details: {DOC_URL}/processes.html#execution-body)." - ), - validator=OneOf(ExecuteResponse.values()) ) notification_email = Email( missing=drop, @@ -6023,7 +6033,34 @@ class ResultsBody(OneOfKeywordSchema): ] +class WpsOutputContextHeader(ExtendedSchemaNode): + # ok to use 'name' in this case because target 'key' in the mapping must + # be that specific value but cannot have a field named with this format + name = "X-WPS-Output-Context" + description = ( + "Contextual location where to store WPS output results from job execution. ", + "When provided, value must be a directory or sub-directories slug. ", + "Resulting contextual location will be relative to server WPS outputs when no context is provided.", + ) + schema_type = String + missing = drop + example = "my-directory/sub-project" + default = None + + +class JobExecuteHeaders(ExtendedMappingSchema): + description = "Indicates the relevant headers that were supplied for job execution or a null value if omitted." + accept = AcceptHeader(missing=None) + accept_language = AcceptLanguageHeader(missing=None) + content_type = RequestContentTypeHeader(missing=None, default=None) + prefer = PreferHeader(missing=None) + x_wps_output_context = WpsOutputContextHeader(missing=None) + + class JobInputsBody(ExecuteInputOutputs): + mode = JobExecuteModeEnum(default=ExecuteMode.AUTO) + response = JobResponseOptionsEnum(default=None) + headers = JobExecuteHeaders(missing={}) links = LinkList(missing=drop) @@ -6492,23 +6529,9 @@ class PutProcessEndpoint(LocalProcessPath): body = PutProcessBodySchema() -class WpsOutputContextHeader(ExtendedSchemaNode): - # ok to use 'name' in this case because target 'key' in the mapping must - # be that specific value but cannot have a field named with this format - name = "X-WPS-Output-Context" - description = ( - "Contextual location where to store WPS output results from job execution. ", - "When provided, value must be a directory or sub-directories slug. ", - "Resulting contextual location will be relative to server WPS outputs when no context is provided.", - ) - schema_type = String - missing = drop - example = "my-directory/sub-project" - default = None - - class ExecuteHeadersBase(RequestHeaders): description = "Request headers supported for job execution." + prefer = PreferHeader(missing=drop) x_wps_output_context = WpsOutputContextHeader() @@ -6560,12 +6583,12 @@ class PostProcessJobsEndpointXML(PostJobsEndpointXML, LocalProcessPath): class PatchJobBodySchema(Execute): description = "Execution request parameters to be updated." # all parameters that are not 'missing=drop' must be added to allow partial update - inputs = ExecuteInputValues(missing=drop, description=Execute.inputs.description) - outputs = ExecuteOutputSpec(missing=drop, description=Execute.outputs.description) + inputs = ExecuteInputValues(missing=drop, description="Input values or references to be updated.") + outputs = ExecuteOutputSpec(missing=drop, description="Output format and transmission mode to be updated.") class PatchJobEndpoint(JobPath): - header = RequestHeaders() + header = JobExecuteHeaders() querystring = LocalProcessQuery() body = PatchJobBodySchema() From f29c0101efcfb269090a107cb9cd18cda9c23a2a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 15 Oct 2024 14:36:53 -0400 Subject: [PATCH 05/33] refactor 'submit_job_handler' to allow submit from pre-created job rather than parsing request from scratch --- tests/functional/test_celery.py | 10 ++-- tests/functional/test_cli.py | 5 +- tests/functional/test_wps_package.py | 52 ++++++++--------- tests/wps_restapi/test_jobs.py | 6 +- weaver/datatype.py | 38 +++++++++++++ weaver/processes/execution.py | 84 ++++++++++++++++++++-------- weaver/store/base.py | 1 + weaver/store/mongodb.py | 21 ++++--- weaver/wps_restapi/jobs/jobs.py | 10 +++- weaver/wps_restapi/jobs/utils.py | 1 - 10 files changed, 160 insertions(+), 68 deletions(-) diff --git a/tests/functional/test_celery.py b/tests/functional/test_celery.py index ad810f155..3cf6949f6 100644 --- a/tests/functional/test_celery.py +++ b/tests/functional/test_celery.py @@ -50,10 +50,12 @@ def test_celery_registry_resolution(): settings = get_settings_from_testapp(webapp) wps_url = get_wps_url(settings) job_store = get_db(settings).get_store("jobs") - job1 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf", - inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}) - job2 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf", - inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}) + job1 = job_store.save_job( + task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}} + ) + job2 = job_store.save_job( + task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}} + ) with contextlib.ExitStack() as stack: celery_mongo_broker = f"""mongodb://{settings["mongodb.host"]}:{settings["mongodb.port"]}/celery-test""" diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 6a157bc67..524d6f7d0 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -770,8 +770,9 @@ def test_jobs_search_multi_status(self): class TestWeaverCLI(TestWeaverClientBase): def setUp(self): super(TestWeaverCLI, self).setUp() - job = self.job_store.save_job(task_id="12345678-1111-2222-3333-111122223333", process="fake-process", - access=Visibility.PUBLIC) + job = self.job_store.save_job( + task_id="12345678-1111-2222-3333-111122223333", process="fake-process", access=Visibility.PUBLIC + ) job.status = Status.SUCCEEDED self.test_job = self.job_store.update_job(job) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index cdbbcd10d..8af4e09e4 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3980,7 +3980,7 @@ def test_execute_single_output_response_raw_value_literal(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" # request status instead of results since not expecting 'document' JSON in this case status_url = resp.json["location"] @@ -4031,7 +4031,7 @@ def test_execute_single_output_response_raw_value_complex(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4080,7 +4080,7 @@ def test_execute_single_output_response_raw_reference_literal(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4139,7 +4139,7 @@ def test_execute_single_output_response_raw_reference_complex(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4211,7 +4211,7 @@ def test_execute_single_output_multipart_accept_data(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4287,7 +4287,7 @@ def test_execute_single_output_multipart_accept_link(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4361,7 +4361,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4403,7 +4403,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert result_json.content_type == ContentType.APP_JSON assert result_json.text == "{\"data\":\"test\"}" @@ -4437,7 +4437,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4480,7 +4480,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) # validate the results can be obtained with the "real" representation result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers) - assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert result_json.content_type == ContentType.APP_JSON assert result_json.text == "{\"data\":\"test\"}" @@ -4517,7 +4517,7 @@ def test_execute_single_output_response_document_alt_format_json_raw_literal(sel path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4600,7 +4600,7 @@ def test_execute_single_output_response_document_default_format_json_special(sel path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" not in resp.headers # rely on location that should be provided to find the job ID @@ -4676,7 +4676,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4757,7 +4757,7 @@ def test_execute_multi_output_multipart_accept_async_not_acceptable(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 406, f"Expected error. Instead got: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 406, f"Expected error. Instead got: [{resp.status_code}]\nReason:\n{resp.text}" assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." assert "Accept header" in resp.json["detail"] assert resp.json["value"] == ContentType.MULTIPART_MIXED @@ -4800,7 +4800,7 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert resp.content_type == ContentType.APP_JSON, "Expect JSON instead of Multipart because of error." assert "status" in resp.json, "Expected a JSON Job Status response." assert "Preference-Applied" in resp.headers @@ -4835,7 +4835,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4908,7 +4908,7 @@ def test_execute_multi_output_response_raw_value(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -4989,7 +4989,7 @@ def test_execute_multi_output_response_raw_reference_default_links(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5067,7 +5067,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5143,7 +5143,7 @@ def test_execute_multi_output_response_raw_mixed(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5232,7 +5232,7 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5301,7 +5301,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5376,7 +5376,7 @@ def test_execute_multi_output_response_document_defaults(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5442,7 +5442,7 @@ def test_execute_multi_output_response_document_mixed(self): path = f"/processes/{p_id}/execution" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) - assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") @@ -5598,7 +5598,7 @@ def test_execute_application_package_process_with_bucket_results(self): proc_url = f"/processes/{self._testMethodName}/jobs" resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5, data=exec_body, headers=self.json_headers, only_local=True) - assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" status_url = resp.json["location"] job_id = resp.json["jobID"] @@ -5708,7 +5708,7 @@ def test_execute_with_directory_output(self): proc_url = f"/processes/{proc}/jobs" resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5, data=exec_body, headers=self.json_headers, only_local=True) - assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" status_url = resp.json["location"] job_id = resp.json["jobID"] diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 5d8219831..db76a413c 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -179,8 +179,10 @@ def make_job(self, ): # type: (...) -> Job if isinstance(created, str): created = date_parser.parse(created) - job = self.job_store.save_job(task_id=task_id, process=process, service=service, is_workflow=False, - execute_async=True, user_id=user_id, access=access, created=created) + job = self.job_store.save_job( + task_id=task_id, process=process, service=service, is_workflow=False, execute_async=True, user_id=user_id, + access=access, created=created + ) job.status = status if status != Status.ACCEPTED: job.started = job.created + datetime.timedelta(seconds=offset if offset is not None else 0) diff --git a/weaver/datatype.py b/weaver/datatype.py index 8fd31182b..571f97f10 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -859,6 +859,25 @@ def wps_id(self, wps_id): raise TypeError(f"Type 'str' or 'UUID' is required for '{self.__name__}.wps_id'") self["wps_id"] = wps_id + @property + def wps_url(self): + # type: () -> Optional[str] + """ + Service URL reference for :term:`WPS` interface. + + .. seealso:: + - :attr:`Process.processEndpointWPS1` + - :attr:`Service.url` + """ + return self.get("wps_url", None) + + @wps_url.setter + def wps_url(self, service): + # type: (Optional[str]) -> None + if not isinstance(service, str): + raise TypeError(f"Type 'str' is required for '{self.__name__}.wps_url'") + self["wps_url"] = service + @property def service(self): # type: () -> Optional[str] @@ -1071,6 +1090,23 @@ def execution_mode(self, mode): raise ValueError(f"Invalid value for '{self.__name__}.execution_mode'. Must be one of {modes}") self["execution_mode"] = mode + @property + def execution_wait(self): + # type: () -> Optional[int] + """ + Execution time (in seconds) to wait for a synchronous response. + """ + if not self.execute_sync: + return None + return self.get("execution_wait") + + @execution_wait.setter + def execution_wait(self, wait): + # type: (Optional[int]) -> None + if wait is not None or not isinstance(wait, int): + raise ValueError(f"Invalid value for '{self.__name__}.execution_wait'. Must be None or an integer.") + self["execution_wait"] = wait + @property def execution_response(self): # type: () -> AnyExecuteResponse @@ -1533,6 +1569,7 @@ def params(self): "id": self.id, "task_id": self.task_id, "wps_id": self.wps_id, + "wps_url": self.wps_url, "service": self.service, "process": self.process, "inputs": self.inputs, @@ -1544,6 +1581,7 @@ def params(self): "execution_response": self.execution_response, "execution_return": self.execution_return, "execution_mode": self.execution_mode, + "execution_wait": self.execution_wait, "is_workflow": self.is_workflow, "created": self.created, "started": self.started, diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index ac27c1ec4..8e7e3e8d0 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -91,6 +91,7 @@ AnyProcessRef, AnyResponseType, AnyServiceRef, + AnySettingsContainer, AnyViewResponse, AnyValueType, CeleryResult, @@ -754,7 +755,7 @@ def submit_job(request, reference, tags=None, process_id=None): def submit_job_handler(payload, # type: ProcessExecution settings, # type: SettingsType - service_url, # type: str + wps_url, # type: str provider=None, # type: Optional[AnyServiceRef] process=None, # type: AnyProcessRef is_workflow=False, # type: bool @@ -767,9 +768,11 @@ def submit_job_handler(payload, # type: ProcessExecution context=None, # type: Optional[str] ): # type: (...) -> AnyResponseType """ - Submits the job to the Celery worker with provided parameters. + Parses parameters that defines the submitted :term:`Job`, and responds accordingly with the selected execution mode. - Assumes that parameters have been pre-fetched and validated, except for the :paramref:`payload`. + Assumes that parameters have been pre-fetched and validated, except for the :paramref:`payload` containing the + desired inputs and outputs from the :term:`Job`. The selected execution mode looks up the various combinations + of headers and body parameters available across :term:`API` implementations and revisions. """ json_body = validate_job_schema(payload) db = get_db(settings) @@ -820,25 +823,47 @@ def submit_job_handler(payload, # type: ProcessExecution store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=job_status, process=process, service=provider_id, inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local, - execute_async=is_execute_async, execute_response=exec_resp, execute_return=exec_return, + execute_async=is_execute_async, execute_wait=wait, + execute_response=exec_resp, execute_return=exec_return, custom_tags=tags, user_id=user, access=visibility, context=context, subscribers=subscribers, accept_type=accept_type, accept_language=language) job.save_log(logger=LOGGER, message=job_message, status=job_status, progress=0) - + job.wps_url = wps_url job = store.update_job(job) - location_url = job.status_url(settings) + + return submit_job_dispatch_task(job, headers=req_headers, container=settings) + + +def submit_job_dispatch_task( + job, # type: Job + *, # force named keyword arguments after + container, # type: AnySettingsContainer + headers=None, # type: AnyHeadersContainer +): # type: (...) -> AnyResponseType + """ + Submits the :term:`Job` to the :mod:`celery` worker with provided parameters. + + Assumes that parameters have been pre-fetched, validated, and can be resolved from the :term:`Job`. + """ + db = get_db(container) + store = db.get_store(StoreJobs) + + location_url = job.status_url(container) resp_headers = {"Location": location_url} - resp_headers.update(applied) + req_headers = copy.deepcopy(headers or {}) task_result = None # type: Optional[CeleryResult] + job_pending_created = job.status == Status.CREATED if not job_pending_created: - wps_url = clean_ows_url(service_url) + wps_url = clean_ows_url(job.wps_url) task_result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) LOGGER.debug("Celery pending task [%s] for job [%s].", task_result.id, job.id) - if not job_pending_created and not is_execute_async: - LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) + + execute_sync = not job_pending_created and not job.execute_async + if execute_sync: + LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", job.execution_wait) try: - task_result.wait(timeout=wait) + task_result.wait(timeout=job.execution_wait) except CeleryTaskTimeoutError: pass if task_result.ready(): @@ -846,32 +871,45 @@ def submit_job_handler(payload, # type: ProcessExecution # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response if job.status == Status.SUCCEEDED: + _, _, sync_applied = parse_prefer_header_execute_mode(req_headers, [ExecuteControlOption.SYNC]) + if sync_applied: + resp_headers.update(sync_applied) return get_job_results_response( job, request_headers=req_headers, response_headers=resp_headers, - container=settings, + container=container, ) # otherwise return the error status - body = job.json(container=settings) + body = job.json(container=container) body["location"] = location_url resp = get_job_submission_response(body, resp_headers, error=True) return resp else: - LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) - # sync not respected, therefore must drop it - # since both could be provided as alternative preferences, drop only async with limited subset - prefer = get_header("Preference-Applied", headers, pop=True) - _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteControlOption.ASYNC]) - if async_applied: - resp_headers.update(async_applied) + job.save_log( + logger=LOGGER, + level=logging.WARNING, + message=( + f"Job requested as synchronous execution took too long to complete (wait={job.execution_wait}s). " + "Will resume with asynchronous execution." + ) + ) + job = store.update_job(job) + execute_sync = False + + if not execute_sync: + # either sync was not respected, therefore must drop it, or it was not requested at all + # since both could be provided as alternative preferences, drop only sync with limited subset + _, _, async_applied = parse_prefer_header_execute_mode(req_headers, [ExecuteControlOption.ASYNC]) + if async_applied: + resp_headers.update(async_applied) LOGGER.debug("Celery task submitted to run async.") body = { "jobID": job.id, "processID": job.process, - "providerID": provider_id, # dropped by validator if not applicable - "status": map_status(job_status), + "providerID": job.service, # dropped by validator if not applicable + "status": map_status(job.status), "location": location_url, # for convenience/backward compatibility, but official is Location *header* } resp_headers = update_preference_applied_return_header(job, req_headers, resp_headers) @@ -893,7 +931,7 @@ def update_job_parameters(job, request): def validate_job_json(request): # type: (Request) -> JSON """ - Validates that the request contains valid :term:`JSON` conctens, but not ncessary valid against expected schema. + Validates that the request contains valid :term:`JSON` contents, but not necessary valid against expected schema. .. seealso:: :func:`validate_job_schema` diff --git a/weaver/store/base.py b/weaver/store/base.py index 6f02c95b8..3eeada1e7 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -175,6 +175,7 @@ def save_job(self, is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool + execute_wait=None, # type: Optional[int] execute_response=None, # type: Optional[AnyExecuteResponse] execute_return=None, # type: Optional[AnyExecuteReturnPreference] custom_tags=None, # type: Optional[List[str]] diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 28ade6a53..7b14df55f 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -4,7 +4,7 @@ import copy import logging import uuid -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast import pymongo from pymongo.collation import Collation @@ -791,6 +791,7 @@ def save_job(self, is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool + execute_wait=None, # type: Optional[int] execute_response=None, # type: Optional[AnyExecuteResponse] execute_return=None, # type: Optional[AnyExecuteReturnPreference] custom_tags=None, # type: Optional[List[str]] @@ -812,10 +813,11 @@ def save_job(self, tags.append(ProcessType.WORKFLOW) else: tags.append(ProcessType.APPLICATION) - if execute_async: - tags.append(ExecuteMode.ASYNC) + if execute_async in [None, False] and execute_wait: + execute_mode = ExecuteMode.SYNC else: - tags.append(ExecuteMode.SYNC) + execute_mode = ExecuteMode.ASYNC + tags.append(execute_mode) if not access: access = Visibility.PRIVATE @@ -829,7 +831,8 @@ def save_job(self, "inputs": inputs, "outputs": outputs, "status": map_status(Status.ACCEPTED), - "execute_async": execute_async, + "execution_mode": execute_mode, + "execution_wait": execute_wait, "execution_response": execute_response, "execution_return": execute_return, "is_workflow": is_workflow, @@ -1047,6 +1050,7 @@ def _find_jobs_grouped(self, pipeline, group_categories): items = found[0]["items"] # convert to Job object where applicable, since pipeline result contains (category, jobs, count) items = [{k: (v if k != "jobs" else [Job(j) for j in v]) for k, v in i.items()} for i in items] + items = cast("JobGroupCategory", items) if has_provider: for group_result in items: group_service = group_result["category"].pop("service", None) @@ -1147,13 +1151,14 @@ def _apply_status_filter(status): statuses = set() for _status in status: if _status in StatusCategory: - category_status = JOB_STATUS_CATEGORIES[StatusCategory[_status]] - statuses = statuses.union(category_status) + status_cat = StatusCategory.get(_status) + category_statuses = JOB_STATUS_CATEGORIES[status_cat] + statuses = statuses.union(category_statuses) else: statuses.add(_status) search_filters["status"] = {"$in": list(statuses)} # type: ignore elif status: - search_filters["status"] = status[0] + search_filters["status"] = str(status[0]) return search_filters @staticmethod diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 27aef5fae..dcde6d21e 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -26,7 +26,13 @@ ) from weaver.processes.constants import JobInputsOutputsSchema from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema -from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler, update_job_parameters +from weaver.processes.execution import ( + submit_job, + submit_job_dispatch_task, + submit_job_dispatch_wps, + submit_job_handler, + update_job_parameters +) from weaver.processes.utils import get_process from weaver.processes.wps_package import mask_process_inputs from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory @@ -285,7 +291,7 @@ def trigger_job_execution(request): raise_job_bad_status_locked(job, request) # FIXME: reuse job, adjust function or map parameters from attributes # FIXME: alt 202 code for accepted on async when triggered this way - return submit_job_handler(request, job) + return submit_job_dispatch_task(job, container=request) @sd.provider_job_service.get( diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 249bb04d5..65ca2ed33 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -1101,7 +1101,6 @@ def get_job_submission_response(body, headers, error=False): "Execution should begin when resources are available." ) body = sd.CreatedJobStatusSchema().deserialize(body) - headers.setdefault("Location", body["location"]) return HTTPCreated(json=body, headerlist=headers) From 00771c7a8efecce61c3ba698038679f0c75c15ed Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 15 Oct 2024 19:12:30 -0400 Subject: [PATCH 06/33] [wip] add test markers for various OAP parts affecting /jobs endpoints + prepare tests to validate (relates to https://github.com/opengeospatial/ogcapi-processes/pull/437, relates to https://github.com/crim-ca/weaver/issues/734, relates to https://github.com/crim-ca/weaver/issues/716) --- setup.cfg | 5 +++ tests/functional/test_wps_package.py | 40 +++++++++++++++++ tests/wps_restapi/test_jobs.py | 65 ++++++++++++++++++++++++---- weaver/utils.py | 3 +- weaver/wps_restapi/api.py | 6 +++ 5 files changed, 109 insertions(+), 10 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4b450a14b..9987df2a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -60,6 +60,11 @@ markers = slow: mark test to be slow remote: mark test with remote Weaver instance requirement vault: mark test with Vault file feature validation + html: mark test as related to HTML rendering + oap_part1: mark test as 'OGC API - Processes - Part 1: Core' functionalities + oap_part2: mark test as 'OGC API - Processes - Part 2: Deploy, Replace, Undeploy (DRU)' functionalities + oap_part3: mark test as 'OGC API - Processes - Part 3: Workflows and Chaining' functionalities + oap_part4: mark test as 'OGC API - Processes - Part 4: Job Management' functionalities filterwarnings = ignore:No file specified for WPS-1 providers registration:RuntimeWarning ignore:.*configuration setting.*weaver\.cwl_processes_dir.*:RuntimeWarning diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 8af4e09e4..163e420f1 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -3598,6 +3598,7 @@ def fix_result_multipart_indent(results): res_dedent = res_dedent.rstrip("\n ") # last line often indented less because of closing multiline string return res_dedent + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3644,6 +3645,7 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) }, } + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_representation_complex(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3693,6 +3695,7 @@ def test_execute_single_output_prefer_header_return_representation_complex(self) }, } + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_minimal_literal_accept_default(self): """ For single requested output, without ``Accept`` content negotiation, its default format is returned directly. @@ -3746,6 +3749,7 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_defau }, } + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_minimal_literal_accept_json(self): """ For single requested output, with ``Accept`` :term:`JSON` content negotiation, document response is returned. @@ -3801,6 +3805,7 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( }, } + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_minimal_complex_accept_default(self): """ For single requested output, without ``Accept`` content negotiation, its default format is returned by link. @@ -3880,6 +3885,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau }, } + @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_minimal_complex_accept_json(self): """ For single requested output, with ``Accept`` :term:`JSON` content negotiation, document response is returned. @@ -3955,6 +3961,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( }, } + @pytest.mark.oap_part1 def test_execute_single_output_response_raw_value_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -3999,6 +4006,7 @@ def test_execute_single_output_response_raw_value_literal(self): }, } + @pytest.mark.oap_part1 def test_execute_single_output_response_raw_value_complex(self): """ Since value transmission is requested for a single output, its :term:`JSON` contents are returned directly. @@ -4054,6 +4062,7 @@ def test_execute_single_output_response_raw_value_complex(self): }, } + @pytest.mark.oap_part1 def test_execute_single_output_response_raw_reference_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4113,6 +4122,7 @@ def test_execute_single_output_response_raw_reference_literal(self): }, } + @pytest.mark.oap_part1 def test_execute_single_output_response_raw_reference_complex(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4172,6 +4182,7 @@ def test_execute_single_output_response_raw_reference_complex(self): }, } + @pytest.mark.oap_part1 def test_execute_single_output_multipart_accept_data(self): """ Validate that requesting multipart for a single output is permitted. @@ -4250,6 +4261,7 @@ def test_execute_single_output_multipart_accept_data(self): }, } + @pytest.mark.oap_part1 def test_execute_single_output_multipart_accept_link(self): """ Validate that requesting multipart for a single output is permitted. @@ -4326,6 +4338,7 @@ def test_execute_single_output_multipart_accept_link(self): } # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + @pytest.mark.oap_part1 @pytest.mark.xfail(reason="not implemented") def test_execute_single_output_multipart_accept_alt_format(self): """ @@ -4408,6 +4421,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): assert result_json.text == "{\"data\":\"test\"}" # FIXME: implement (https://github.com/crim-ca/weaver/pull/548) + @pytest.mark.oap_part1 @pytest.mark.xfail(reason="not implemented") def test_execute_single_output_response_document_alt_format_yaml(self): proc = "EchoResultsTester" @@ -4484,6 +4498,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): assert result_json.content_type == ContentType.APP_JSON assert result_json.text == "{\"data\":\"test\"}" + @pytest.mark.oap_part1 def test_execute_single_output_response_document_alt_format_json_raw_literal(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4555,6 +4570,7 @@ def test_execute_single_output_response_document_alt_format_json_raw_literal(sel # assert result_json.content_type == ContentType.APP_JSON # assert result_json.json == {"data": "test"} + @pytest.mark.oap_part1 def test_execute_single_output_response_document_default_format_json_special(self): """ Validate that a :term:`JSON` output is directly embedded in a ``document`` response also using :term:`JSON`. @@ -4631,6 +4647,7 @@ def test_execute_single_output_response_document_default_format_json_special(sel }, } + @pytest.mark.oap_part1 @parameterized.expand([ ContentType.MULTIPART_ANY, ContentType.MULTIPART_MIXED, @@ -4724,6 +4741,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_multipart_accept_async_not_acceptable(self): """ When executing the process asynchronously, ``Accept`` with multipart (strictly) is not acceptable. @@ -4766,6 +4784,7 @@ def test_execute_multi_output_multipart_accept_async_not_acceptable(self): "in": "headers", } + @pytest.mark.oap_part1 def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): """ When executing the process asynchronously, ``Accept`` with multipart and an alternative is acceptable. @@ -4806,6 +4825,7 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): assert "Preference-Applied" in resp.headers assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") + @pytest.mark.oap_part1 def test_execute_multi_output_prefer_header_return_representation(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4881,6 +4901,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_value(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -4954,6 +4975,7 @@ def test_execute_multi_output_response_raw_value(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_reference_default_links(self): """ All outputs resolved as reference (explicitly or inferred) with raw representation should be all Link headers. @@ -5028,6 +5050,7 @@ def test_execute_multi_output_response_raw_reference_default_links(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_reference_accept_multipart(self): """ Requesting ``multipart`` explicitly should return it instead of default ``Link`` headers response. @@ -5115,6 +5138,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_mixed(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_process_name(proc) @@ -5200,6 +5224,7 @@ def test_execute_multi_output_response_raw_mixed(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_prefer_header_return_minimal_defaults(self): """ Test ``Prefer: return=minimal`` with default ``transmissionMode`` resolutions for literal/complex outputs. @@ -5264,6 +5289,7 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_prefer_header_return_minimal_override_transmission(self): """ Test ``Prefer: return=minimal`` with ``transmissionMode`` overrides. @@ -5344,6 +5370,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_document_defaults(self): """ Test ``response: document`` with default ``transmissionMode`` resolutions for literal/complex outputs. @@ -5408,6 +5435,7 @@ def test_execute_multi_output_response_document_defaults(self): }, } + @pytest.mark.oap_part1 def test_execute_multi_output_response_document_mixed(self): """ Test ``response: document`` with ``transmissionMode`` specified to force convertion of literal/complex outputs. @@ -5485,6 +5513,18 @@ def test_execute_multi_output_response_document_mixed(self): }, } + @pytest.mark.oap_part4 + def test_execute_jobs_sync(self): + raise NotImplementedError # FIMXE: POST /jobs with 'Prefer: wait=X' and return results directly + + @pytest.mark.oap_part4 + def test_execute_jobs_async(self): + raise NotImplementedError # FIMXE: POST /jobs with 'Prefer: respond-asny' and GET /jobs/{jobId}/results + + @pytest.mark.oap_part4 + def test_execute_jobs_create_trigger(self): + raise NotImplementedError # FIMXE: POST /jobs with 'status:create' and POST /jobs/{jobId}/results to trigger + @pytest.mark.functional class WpsPackageAppWithS3BucketTest(WpsConfigBase, ResourcesUtil): diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index db76a413c..b7f194aa0 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -283,6 +283,7 @@ def check_basic_jobs_grouped_info(response, groups): total += grouped_jobs["count"] assert total == response.json["total"] + @pytest.mark.oap_part1 def test_get_jobs_normal_paged(self): resp = self.app.get(sd.jobs_service.path, headers=self.json_headers) self.check_basic_jobs_info(resp) @@ -324,6 +325,8 @@ def test_get_jobs_detail_grouped(self): for job in grouped_jobs["jobs"]: self.check_job_format(job) + @pytest.mark.html + @pytest.mark.oap_part1 @parameterized.expand([ ({}, ), # detail omitted should apply it for HTML, unlike JSON that returns the simplified listing by default ({"detail": None}, ), @@ -349,6 +352,7 @@ def test_get_jobs_detail_html_enforced(self, params): jobs = [line for line in resp.text.splitlines() if "job-list-item" in line] assert len(jobs) == 6 + @pytest.mark.html def test_get_jobs_groups_html_unsupported(self): groups = ["process", "service"] path = get_path_kvp(sd.jobs_service.path, groups=groups) @@ -426,6 +430,7 @@ def test_get_jobs_valid_grouping_by_provider(self): """ self.template_get_jobs_valid_grouping_by_service_provider("provider") + @pytest.mark.oap_part1 def test_get_jobs_links_navigation(self): """ Verifies that relation links update according to context in order to allow natural navigation between responses. @@ -545,6 +550,7 @@ def test_get_jobs_links_navigation(self): assert links["first"].startswith(jobs_url) and limit_kvp in links["first"] and "page=0" in links["first"] assert links["last"].startswith(jobs_url) and limit_kvp in links["last"] and "page=0" in links["last"] + @pytest.mark.oap_part1 def test_get_jobs_page_out_of_range(self): resp = self.app.get(sd.jobs_service.path, headers=self.json_headers) total = resp.json["total"] @@ -609,8 +615,8 @@ def test_get_jobs_by_encrypted_email(self): # verify the email is not in plain text job = self.job_store.fetch_by_id(job_id) - assert job.notification_email != email and job.notification_email is not None - assert decrypt_email(job.notification_email, self.settings) == email, "Email should be recoverable." + assert job.notification_email != email and job.notification_email is not None # noqa + assert decrypt_email(job.notification_email, self.settings) == email, "Email should be recoverable." # noqa # make sure that jobs searched using email are found with encryption transparently for the user path = get_path_kvp(sd.jobs_service.path, detail="true", notification_email=email) @@ -620,6 +626,7 @@ def test_get_jobs_by_encrypted_email(self): assert resp.json["total"] == 1, "Should match exactly 1 email with specified literal string as query param." assert resp.json["jobs"][0]["jobID"] == job_id + @pytest.mark.oap_part1 def test_get_jobs_by_type_process(self): path = get_path_kvp(sd.jobs_service.path, type="process") resp = self.app.get(path, headers=self.json_headers) @@ -758,6 +765,7 @@ def test_get_jobs_process_unknown_in_query(self): assert resp.status_code == 404 assert resp.content_type == ContentType.APP_JSON + @pytest.mark.oap_part1 @parameterized.expand([ get_path_kvp( sd.jobs_service.path, @@ -861,9 +869,9 @@ def test_get_jobs_private_service_public_process_forbidden_access_in_query(self) def test_get_jobs_public_service_private_process_forbidden_access_in_query(self): """ - NOTE: - it is up to the remote service to hide private processes - if the process is visible, the a job can be executed and it is automatically considered public + .. note:: + It is up to the remote service to hide private processes. + If the process is visible, the job can be executed and it is automatically considered public. """ path = get_path_kvp(sd.jobs_service.path, service=self.service_public.name, @@ -877,9 +885,9 @@ def test_get_jobs_public_service_private_process_forbidden_access_in_query(self) def test_get_jobs_public_service_no_processes(self): """ - NOTE: - it is up to the remote service to hide private processes - if the process is invisible, no job should have been executed nor can be fetched + .. note:: + It is up to the remote service to hide private processes. + If the process is invisible, no job should have been executed nor can be fetched. """ path = get_path_kvp(sd.jobs_service.path, service=self.service_public.name, @@ -964,6 +972,7 @@ def filter_service(jobs): # type: (Iterable[Job]) -> List[Job] test_values = {"path": path, "access": access, "user_id": user_id} self.assert_equal_with_jobs_diffs(job_result, job_expect, test_values, index=i) + @pytest.mark.oap_part1 def test_jobs_list_with_limit_api(self): """ Test handling of ``limit`` query parameter when listing jobs. @@ -982,6 +991,7 @@ def test_jobs_list_with_limit_api(self): assert resp.json["limit"] == limit_parameter assert len(resp.json["jobs"]) <= limit_parameter + @pytest.mark.oap_part1 def test_jobs_list_schema_not_required_fields(self): """ Test that job listing query parameters for filtering results are marked as optional in OpenAPI schema. @@ -1104,6 +1114,7 @@ def test_get_jobs_datetime_interval(self): assert date_parser.parse(resp.json["created"]) >= date_parser.parse(datetime_after) assert date_parser.parse(resp.json["created"]) <= date_parser.parse(datetime_before) + @pytest.mark.oap_part1 def test_get_jobs_datetime_match(self): """ Test that only filtered jobs at a specific time are returned when ``datetime`` query parameter is provided. @@ -1127,6 +1138,7 @@ def test_get_jobs_datetime_match(self): assert resp.content_type == ContentType.APP_JSON assert date_parser.parse(resp.json["created"]) == date_parser.parse(datetime_match) + @pytest.mark.oap_part1 def test_get_jobs_datetime_invalid(self): """ Test that incorrectly formatted ``datetime`` query parameter value is handled. @@ -1144,6 +1156,7 @@ def test_get_jobs_datetime_invalid(self): resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 400 + @pytest.mark.oap_part1 def test_get_jobs_datetime_interval_invalid(self): """ Test that invalid ``datetime`` query parameter value is handled. @@ -1161,6 +1174,7 @@ def test_get_jobs_datetime_interval_invalid(self): resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 422 + @pytest.mark.oap_part1 def test_get_jobs_datetime_before_invalid(self): """ Test that invalid ``datetime`` query parameter value with a range is handled. @@ -1177,6 +1191,7 @@ def test_get_jobs_datetime_before_invalid(self): resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 400 + @pytest.mark.oap_part1 def test_get_jobs_duration_min_only(self): test = {"minDuration": 35} path = get_path_kvp(sd.jobs_service.path, **test) @@ -1203,6 +1218,7 @@ def test_get_jobs_duration_min_only(self): expect_jobs = [self.job_info[i].id for i in [8]] self.assert_equal_with_jobs_diffs(result_jobs, expect_jobs, test) + @pytest.mark.oap_part1 def test_get_jobs_duration_max_only(self): test = {"maxDuration": 30} path = get_path_kvp(sd.jobs_service.path, **test) @@ -1224,6 +1240,7 @@ def test_get_jobs_duration_max_only(self): expect_jobs = [self.job_info[i].id for i in expect_idx] self.assert_equal_with_jobs_diffs(result_jobs, expect_jobs, test) + @pytest.mark.oap_part1 def test_get_jobs_duration_min_max(self): # note: avoid range <35s for this test to avoid sudden dynamic duration of 9, 10 becoming within min/max test = {"minDuration": 35, "maxDuration": 60} @@ -1249,6 +1266,7 @@ def test_get_jobs_duration_min_max(self): result_jobs = resp.json["jobs"] assert len(result_jobs) == 0 + @pytest.mark.oap_part1 def test_get_jobs_duration_min_max_invalid(self): test = {"minDuration": 30, "maxDuration": 20} path = get_path_kvp(sd.jobs_service.path, **test) @@ -1270,6 +1288,7 @@ def test_get_jobs_duration_min_max_invalid(self): resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code in [400, 422] + @pytest.mark.oap_part1 def test_get_jobs_by_status_single(self): test = {"status": Status.SUCCEEDED} path = get_path_kvp(sd.jobs_service.path, **test) @@ -1287,6 +1306,7 @@ def test_get_jobs_by_status_single(self): result_jobs = resp.json["jobs"] self.assert_equal_with_jobs_diffs(result_jobs, expect_jobs, test) + @pytest.mark.oap_part1 def test_get_jobs_by_status_multi(self): test = {"status": f"{Status.SUCCEEDED},{Status.RUNNING}"} path = get_path_kvp(sd.jobs_service.path, **test) @@ -1312,6 +1332,7 @@ def test_get_jobs_by_status_invalid(self): assert resp.json["value"]["status"] == status assert "status" in resp.json["cause"] + @pytest.mark.oap_part1 def test_get_job_status_response_process_id(self): """ Verify the processID value in the job status response. @@ -1332,6 +1353,7 @@ def test_get_job_status_response_process_id(self): assert resp.json["processID"] == "process-public" + @pytest.mark.oap_part1 def test_get_job_invalid_uuid(self): """ Test handling of invalid UUID reference to search job. @@ -1350,6 +1372,7 @@ def test_get_job_invalid_uuid(self): assert resp.json["type"].endswith("no-such-job") assert "UUID" in resp.json["detail"] + @pytest.mark.oap_part1 @mocked_dismiss_process() def test_job_dismiss_running_single(self): """ @@ -1388,6 +1411,7 @@ def test_job_dismiss_running_single(self): assert resp.status_code == 410, "Job cannot be dismissed again." assert job.id in resp.json["value"] + @pytest.mark.oap_part1 @mocked_dismiss_process() def test_job_dismiss_complete_single(self): """ @@ -1472,7 +1496,7 @@ def test_job_dismiss_batch(self): def test_job_results_errors(self): """ - Validate errors returned for a incomplete, failed or dismissed job when requesting its results. + Validate errors returned for an incomplete, failed or dismissed job when requesting its results. """ job_accepted = self.make_job( task_id="1111-0000-0000-0000", process=self.process_public.identifier, service=None, @@ -1637,6 +1661,7 @@ def test_jobs_inputs_outputs_validations(self): with self.assertRaises(colander.Invalid): sd.Execute().deserialize({"outputs": {"random": {"transmissionMode": "bad"}}}) + @pytest.mark.oap_part4 def test_job_logs_formats(self): path = f"/jobs/{self.job_info[0].id}/logs" resp = self.app.get(path, headers=self.json_headers) @@ -1703,6 +1728,7 @@ def test_job_logs_formats(self): assert "Process" in lines[1] assert "Complete" in lines[2] + @pytest.mark.oap_part4 def test_job_logs_formats_unsupported(self): path = f"/jobs/{self.job_info[0].id}/logs" resp = self.app.get(path, headers={"Accept": ContentType.IMAGE_GEOTIFF}, expect_errors=True) @@ -1742,7 +1768,28 @@ def test_job_statistics_response(self): if job: self.job_store.delete_job(job.id) + @pytest.mark.oap_part4 + def test_job_inputs_response(self): + raise NotImplementedError # FIXME (https://github.com/crim-ca/weaver/issues/734) + + @pytest.mark.oap_part4 + def test_job_outputs_response(self): + raise NotImplementedError # FIXME + + @pytest.mark.oap_part4 + def test_job_run_response(self): + raise NotImplementedError # FIXME + + @pytest.mark.oap_part4 + def test_job_run_response(self): + raise NotImplementedError # FIXME + + @pytest.mark.oap_part4 + def test_job_update_response(self): + raise NotImplementedError # FIXME + +@pytest.mark.oap_part1 @pytest.mark.parametrize( ["results", "expected"], [ diff --git a/weaver/utils.py b/weaver/utils.py index 3dd180df2..8c492040a 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -90,6 +90,7 @@ MutableMapping, NoReturn, Optional, + Sequence, Tuple, Type, TypeVar, @@ -1538,7 +1539,7 @@ def islambda(func): def get_path_kvp(path, sep=",", **params): - # type: (str, str, **AnyValueType) -> str + # type: (str, str, **Union[AnyValueType, Sequence[AnyValueType]]) -> str """ Generates the URL with Key-Value-Pairs (:term:`KVP`) query parameters. diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 83d57db57..59de68a20 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -367,12 +367,18 @@ def get_conformance(category, settings): f"{ogcapi_proc_core}/conf/ogc-process-description", f"{ogcapi_proc_core}/req/json", f"{ogcapi_proc_core}/req/json/definition", + f"{ogcapi_proc_core}/req/job-list/datetime-definition", + f"{ogcapi_proc_core}/req/job-list/datetime-response", + f"{ogcapi_proc_core}/req/job-list/duration-definition", + f"{ogcapi_proc_core}/req/job-list/duration-response", f"{ogcapi_proc_core}/req/job-list/links", f"{ogcapi_proc_core}/req/job-list/jl-limit-definition", f"{ogcapi_proc_core}/req/job-list/job-list-op", f"{ogcapi_proc_core}/req/job-list/processID-definition", f"{ogcapi_proc_core}/req/job-list/processID-mandatory", f"{ogcapi_proc_core}/req/job-list/processid-response", + f"{ogcapi_proc_core}/req/job-list/status-definition", + f"{ogcapi_proc_core}/req/job-list/status-response", f"{ogcapi_proc_core}/req/job-list/type-definition", f"{ogcapi_proc_core}/req/job-list/type-response", # FIXME: KVP exec (https://github.com/crim-ca/weaver/issues/607, https://github.com/crim-ca/weaver/issues/445) From f9f70d0ef88aeb612f80ff5f395aafa2d089287f Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 16 Oct 2024 01:18:41 -0400 Subject: [PATCH 07/33] [wip] prepare tests for /jobs operations --- tests/functional/test_cli.py | 8 +- tests/functional/test_wps_package.py | 278 +++++++++++++++++++--- tests/functional/utils.py | 27 ++- tests/wps_restapi/test_jobs.py | 174 +++++++++++++- tests/wps_restapi/test_processes.py | 24 +- tests/wps_restapi/test_providers.py | 8 +- weaver/wps_restapi/jobs/jobs.py | 1 - weaver/wps_restapi/swagger_definitions.py | 3 + 8 files changed, 443 insertions(+), 80 deletions(-) diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 524d6f7d0..d72997a40 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -986,7 +986,7 @@ def test_deploy_docker_auth_username_password_valid(self): the expected authentication credentials. Re-running this test by itself validates if this case happened. Find a way to make it work seamlessly. Retries sometime works, but it is not guaranteed. """ - p_id = self.fully_qualified_test_process_name() + p_id = self.fully_qualified_test_name() docker_reg = "fake.repo" docker_img = "org/project/private-image:latest" docker_ref = f"{docker_reg}/{docker_img}" @@ -1031,7 +1031,7 @@ def test_deploy_docker_auth_token_valid(self): .. seealso:: :meth:`tests.wps_restapi.test_processes.WpsRestApiProcessesTest.test_deploy_process_CWL_DockerRequirement_auth_header_format` """ - p_id = self.fully_qualified_test_process_name() + p_id = self.fully_qualified_test_name() docker_reg = "fake.repo" docker_img = "org/project/private-image:latest" docker_ref = f"{docker_reg}/{docker_img}" @@ -1073,7 +1073,7 @@ def test_deploy_docker_auth_username_or_password_with_token_invalid(self): All parameter values are themselves valid, only their combination that are not. """ - p_id = self.fully_qualified_test_process_name() + p_id = self.fully_qualified_test_name() docker_reg = "fake.repo" docker_img = "org/project/private-image:latest" docker_ref = f"{docker_reg}/{docker_img}" @@ -1151,7 +1151,7 @@ def test_deploy_docker_auth_username_or_password_missing_invalid(self): All parameter values are themselves valid, only their combination that are not. """ - p_id = self.fully_qualified_test_process_name() + p_id = self.fully_qualified_test_name() docker_reg = "fake.repo" docker_img = "org/project/private-image:latest" docker_ref = f"{docker_reg}/{docker_img}" diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 163e420f1..d5e2078db 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -2240,7 +2240,7 @@ def test_execute_job_with_inline_input_values(self): def test_execute_job_with_bbox(self): body = self.retrieve_payload("EchoBoundingBox", "deploy", local=True) - proc = self.fully_qualified_test_process_name(self._testMethodName) + proc = self.fully_qualified_test_name(self._testMethodName) self.deploy_process(body, describe_schema=ProcessSchema.OGC, process_id=proc) data = self.retrieve_payload("EchoBoundingBox", "execute", local=True) @@ -2276,7 +2276,7 @@ def test_execute_job_with_bbox(self): def test_execute_job_with_collection_input_geojson_feature_collection(self): name = "EchoFeatures" body = self.retrieve_payload(name, "deploy", local=True) - proc = self.fully_qualified_test_process_name(self._testMethodName) + proc = self.fully_qualified_test_name(self._testMethodName) self.deploy_process(body, describe_schema=ProcessSchema.OGC, process_id=proc) with contextlib.ExitStack() as stack: @@ -2331,7 +2331,7 @@ def test_execute_job_with_collection_input_geojson_feature_collection(self): def test_execute_job_with_collection_input_ogc_features(self, filter_method, filter_lang, filter_value): name = "EchoFeatures" body = self.retrieve_payload(name, "deploy", local=True) - proc = self.fully_qualified_test_process_name(self._testMethodName) + proc = self.fully_qualified_test_name(self._testMethodName) self.deploy_process(body, describe_schema=ProcessSchema.OGC, process_id=proc) with contextlib.ExitStack() as stack: @@ -3601,7 +3601,7 @@ def fix_result_multipart_indent(results): @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_representation_literal(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3648,7 +3648,7 @@ def test_execute_single_output_prefer_header_return_representation_literal(self) @pytest.mark.oap_part1 def test_execute_single_output_prefer_header_return_representation_complex(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3701,7 +3701,7 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_defau For single requested output, without ``Accept`` content negotiation, its default format is returned directly. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3755,7 +3755,7 @@ def test_execute_single_output_prefer_header_return_minimal_literal_accept_json( For single requested output, with ``Accept`` :term:`JSON` content negotiation, document response is returned. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3823,7 +3823,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_defau - :func:`test_execute_single_output_prefer_header_return_representation_complex` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3906,7 +3906,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( using the ``response`` parameter at :term:`Job` execution time, as alternative method to ``Prefer``. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -3964,7 +3964,7 @@ def test_execute_single_output_prefer_header_return_minimal_complex_accept_json( @pytest.mark.oap_part1 def test_execute_single_output_response_raw_value_literal(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4015,7 +4015,7 @@ def test_execute_single_output_response_raw_value_complex(self): - :func:`test_execute_single_output_prefer_header_return_minimal_complex_accept_json` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4065,7 +4065,7 @@ def test_execute_single_output_response_raw_value_complex(self): @pytest.mark.oap_part1 def test_execute_single_output_response_raw_reference_literal(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4125,7 +4125,7 @@ def test_execute_single_output_response_raw_reference_literal(self): @pytest.mark.oap_part1 def test_execute_single_output_response_raw_reference_complex(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4196,7 +4196,7 @@ def test_execute_single_output_multipart_accept_data(self): - :func:`test_execute_single_output_multipart_accept_alt_format` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4273,7 +4273,7 @@ def test_execute_single_output_multipart_accept_link(self): - :func:`test_execute_single_output_multipart_accept_alt_format` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4348,7 +4348,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): output representation, based on the ``format`` definition. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4425,7 +4425,7 @@ def test_execute_single_output_multipart_accept_alt_format(self): @pytest.mark.xfail(reason="not implemented") def test_execute_single_output_response_document_alt_format_yaml(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4501,7 +4501,7 @@ def test_execute_single_output_response_document_alt_format_yaml(self): @pytest.mark.oap_part1 def test_execute_single_output_response_document_alt_format_json_raw_literal(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4585,7 +4585,7 @@ def test_execute_single_output_response_document_default_format_json_special(sel - :func:`test_execute_single_output_response_document_alt_format_json` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4661,7 +4661,7 @@ def test_execute_multi_output_multipart_accept(self, multipart_header): - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4754,7 +4754,7 @@ def test_execute_multi_output_multipart_accept_async_not_acceptable(self): - :func:`test_execute_multi_output_multipart_accept_async_alt_acceptable` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4797,7 +4797,7 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4828,7 +4828,7 @@ def test_execute_multi_output_multipart_accept_async_alt_acceptable(self): @pytest.mark.oap_part1 def test_execute_multi_output_prefer_header_return_representation(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4904,7 +4904,7 @@ def test_execute_multi_output_prefer_header_return_representation(self): @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_value(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -4986,7 +4986,7 @@ def test_execute_multi_output_response_raw_reference_default_links(self): - :func:`test_execute_multi_output_response_raw_reference_accept_multipart` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5061,7 +5061,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): - :func:`test_execute_multi_output_multipart_accept_async_not_acceptable` """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5141,7 +5141,7 @@ def test_execute_multi_output_response_raw_reference_accept_multipart(self): @pytest.mark.oap_part1 def test_execute_multi_output_response_raw_mixed(self): proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5230,7 +5230,7 @@ def test_execute_multi_output_prefer_header_return_minimal_defaults(self): Test ``Prefer: return=minimal`` with default ``transmissionMode`` resolutions for literal/complex outputs. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5299,7 +5299,7 @@ def test_execute_multi_output_prefer_header_return_minimal_override_transmission embedded inline. However, this respects the *preference* vs *enforced* property requirements. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5376,7 +5376,7 @@ def test_execute_multi_output_response_document_defaults(self): Test ``response: document`` with default ``transmissionMode`` resolutions for literal/complex outputs. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5441,7 +5441,7 @@ def test_execute_multi_output_response_document_mixed(self): Test ``response: document`` with ``transmissionMode`` specified to force convertion of literal/complex outputs. """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) @@ -5513,17 +5513,225 @@ def test_execute_multi_output_response_document_mixed(self): }, } + @pytest.mark.oap_part4 + def test_execute_mismatch_process(self): + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + proc = "random-other-process" + proc_other = self.fully_qualified_test_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_content = { + "process": f"https://localhost/processes/{p_id}", + "inputs": {"message": "test"} + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = f"/processes/{proc_other}/execution" # mismatch on purpose + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=self.json_headers, only_local=True) + assert resp.status_code == 400, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + assert resp.json["cause"] == {"name": "process", "in": "body"} + @pytest.mark.oap_part4 def test_execute_jobs_sync(self): - raise NotImplementedError # FIMXE: POST /jobs with 'Prefer: wait=X' and return results directly + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + exec_headers = { + "Accept": ContentType.APP_JSON, # response 'document' should be enough to use JSON, but make extra sure + "Content-Type": ContentType.APP_JSON, + } + exec_content = { + "process": f"https://localhost/processes/{p_id}", + "mode": ExecuteMode.SYNC, # force sync to make sure JSON job status is not returned instead + "response": ExecuteResponse.DOCUMENT, + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": { + "transmissionMode": ExecuteTransmissionMode.VALUE, # force convert of the file reference + "format": {"mediaType": ContentType.APP_JSON}, # request output format explicitly + } + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert "Preference-Applied" not in resp.headers + + # rely on location that should be provided to find the job ID + results_url = get_header("Content-Location", resp.headers) + assert results_url, ( + "Content-Location should have been provided in" + "results response pointing at where they can be found." + ) + job_id = results_url.rsplit("/results")[0].rsplit("/jobs/")[-1] + assert is_uuid(job_id), f"Failed to retrieve the job ID: [{job_id}] is not a UUID" + out_url = get_wps_output_url(self.settings) + + # validate the results based on original execution request + results = resp + assert results.content_type.startswith(ContentType.APP_JSON) + assert results.json == { + "output_json": { + "mediaType": ContentType.APP_JSON, + "value": {"data": "test"}, + } + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } @pytest.mark.oap_part4 def test_execute_jobs_async(self): - raise NotImplementedError # FIMXE: POST /jobs with 'Prefer: respond-asny' and GET /jobs/{jobId}/results + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + exec_headers = { + "Prefer": prefer_header + } + exec_headers.update(self.json_headers) + exec_content = { + "process": f"https://localhost/processes/{p_id}", + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") + + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + job_id = status["jobID"] + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results_json == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } @pytest.mark.oap_part4 def test_execute_jobs_create_trigger(self): - raise NotImplementedError # FIMXE: POST /jobs with 'status:create' and POST /jobs/{jobId}/results to trigger + proc = "EchoResultsTester" + p_id = self.fully_qualified_test_name(proc) + body = self.retrieve_payload(proc, "deploy", local=True) + self.deploy_process(body, process_id=p_id) + + prefer_header = f"return={ExecuteReturnPreference.MINIMAL}, respond-async" + exec_headers = { + "Prefer": prefer_header + } + exec_headers.update(self.json_headers) + exec_content = { + "process": f"https://localhost/processes/{p_id}", + "status": "create", # force wait until triggered + "inputs": { + "message": "test" + }, + "outputs": { + "output_json": {}, + "output_data": {} + } + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=exec_headers, only_local=True) + assert resp.status_code == 201, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert "Preference-Applied" in resp.headers + assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") + + status_url = resp.json["location"] + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.CREATED + + job_id = status["jobID"] + res_path = f"/jobs/{job_id}/results" + res_headers = { + "Accept": ContentType.APP_JSON, + } + resp = mocked_sub_requests(self.app, "post_json", res_path, timeout=5, + data={}, headers=res_headers, only_local=True) + assert resp.status_code == 202, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + + status = self.monitor_job(status_url, return_status=True) + assert status["status"] == Status.SUCCEEDED + + out_url = get_wps_output_url(self.settings) + results = self.app.get(f"/jobs/{job_id}/results") + results_json = self.remove_result_format(results.json) + assert results.content_type.startswith(ContentType.APP_JSON) + assert results_json == { + "output_data": "test", + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } + outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT}) + assert outputs.content_type.startswith(ContentType.APP_JSON) + assert outputs.json["outputs"] == { + "output_data": { + "value": "test" + }, + "output_json": { + "href": f"{out_url}/{job_id}/output_json/result.json", + "type": ContentType.APP_JSON, + }, + } @pytest.mark.functional @@ -5817,7 +6025,7 @@ def test_execute_with_result_representations(self): .. versionadded:: 6.0 """ proc = "EchoResultsTester" - p_id = self.fully_qualified_test_process_name(proc) + p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 2eaea6880..733a7bcee 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -61,7 +61,21 @@ ReferenceType = Literal["deploy", "describe", "execute", "package", "quotation", "estimator"] -class ResourcesUtil(object): +class GenericUtils(unittest.TestCase): + def fully_qualified_test_name(self, name=""): + """ + Generates a unique name using the current test method full context name and the provided name, if any. + + Normalizes the generated name such that it can be used as a valid :term:`Process` or :term:`Service` ID. + """ + extra_name = f"-{name}" if name else "" + class_name = fully_qualified_name(self) + test_name = f"{class_name}.{self._testMethodName}{extra_name}" + test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-") + return test_name + + +class ResourcesUtil(GenericUtils): @classmethod def request(cls, method, url, *args, **kwargs): # type: (AnyRequestMethod, str, *Any, **Any) -> AnyResponseType @@ -271,7 +285,7 @@ def get_builtin_process_names(): return proc_names -class JobUtils(object): +class JobUtils(GenericUtils): job_store = None job_info = None # type: Iterable[Job] @@ -316,7 +330,7 @@ def assert_equal_with_jobs_diffs(self, ) -class WpsConfigBase(unittest.TestCase): +class WpsConfigBase(GenericUtils): json_headers = MappingProxyType({"Accept": ContentType.APP_JSON, "Content-Type": ContentType.APP_JSON}) html_headers = MappingProxyType({"Accept": ContentType.TEXT_HTML}) xml_headers = MappingProxyType({"Content-Type": ContentType.TEXT_XML}) @@ -440,13 +454,6 @@ def _try_get_logs(self, status_url): return f"Error logs:\n{_text}" return "" - def fully_qualified_test_process_name(self, name=""): - extra_name = f"-{name}" if name else "" - class_name = fully_qualified_name(self) - test_name = f"{class_name}.{self._testMethodName}{extra_name}" - test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-") - return test_name - @overload def monitor_job(self, status_url, **__): # type: (str, **Any) -> ExecutionResults diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index b7f194aa0..1fd027d34 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -5,7 +5,6 @@ import os import shutil import tempfile -import unittest import warnings from datetime import date from typing import TYPE_CHECKING @@ -32,7 +31,7 @@ ) from weaver.compat import Version from weaver.datatype import Job, Service -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.notify import decrypt_email from weaver.processes.wps_testing import WpsTestProcess @@ -49,14 +48,14 @@ ) if TYPE_CHECKING: - from typing import Iterable, List, Optional, Tuple, Union + from typing import Any, Iterable, List, Optional, Tuple, Union from weaver.status import AnyStatusType from weaver.typedefs import AnyLogLevel, JSON, Number, Statistics from weaver.visibility import AnyVisibility -class WpsRestApiJobsTest(unittest.TestCase, JobUtils): +class WpsRestApiJobsTest(JobUtils): settings = {} config = None @@ -161,13 +160,14 @@ def setUp(self): user_id=self.user_editor1_id, status=Status.STARTED, progress=99, access=Visibility.PUBLIC) def make_job(self, + *, # force keyword arguments task_id, # type: str process, # type: str service, # type: Optional[str] - user_id, # type: Optional[int] status, # type: AnyStatusType progress, # type: int - access, # type: AnyVisibility + access=None, # type: AnyVisibility + user_id=None, # type: Optional[int] created=None, # type: Optional[Union[datetime.datetime, str]] offset=None, # type: Optional[int] duration=None, # type: Optional[int] @@ -176,13 +176,14 @@ def make_job(self, statistics=None, # type: Optional[Statistics] tags=None, # type: Optional[List[str]] add_info=True, # type: bool + **job_params, # type: Any ): # type: (...) -> Job if isinstance(created, str): created = date_parser.parse(created) job = self.job_store.save_job( task_id=task_id, process=process, service=service, is_workflow=False, execute_async=True, user_id=user_id, - access=access, created=created - ) + access=access, created=created, **job_params + ) job.status = status if status != Status.ACCEPTED: job.started = job.created + datetime.timedelta(seconds=offset if offset is not None else 0) @@ -1770,23 +1771,170 @@ def test_job_statistics_response(self): @pytest.mark.oap_part4 def test_job_inputs_response(self): - raise NotImplementedError # FIXME (https://github.com/crim-ca/weaver/issues/734) + path = f"/jobs/{self.job_info[0].id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["inputs"] == {"test": "data"} + assert resp.json["outputs"] == {"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}} + assert resp.json["headers"] == { + "Accept": None, + "Accept-Language": None, + "Content-Type": None, + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + "X-WPS-Output-Context": "public" + } + assert resp.json["mode"] == ExecuteMode.ASYNC + assert resp.json["response"] == ExecuteResponse.DOCUMENT @pytest.mark.oap_part4 def test_job_outputs_response(self): raise NotImplementedError # FIXME @pytest.mark.oap_part4 + @pytest.mark.xfail(reason="CWL PROV not implemented (https://github.com/crim-ca/weaver/issues/673)") def test_job_run_response(self): - raise NotImplementedError # FIXME + raise NotImplementedError # FIXME (https://github.com/crim-ca/weaver/issues/673) @pytest.mark.oap_part4 - def test_job_run_response(self): - raise NotImplementedError # FIXME + def test_job_update_locked(self): + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.RUNNING, progress=100, access=Visibility.PUBLIC, + inputs={"test": "data"}, outputs={"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}}, + ) + path = f"/jobs/{new_job.id}" + body = {"inputs": {"test": 400}} + resp = self.app.patch(path, params=body, headers=self.json_headers, expect_errors=True) + assert resp.status_code == 423 + assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/locked" @pytest.mark.oap_part4 def test_job_update_response(self): - raise NotImplementedError # FIXME + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.CREATED, progress=100, access=Visibility.PUBLIC, + inputs={"test": "data"}, outputs={"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}}, + ) + + # check precondition job setup + path = f"/jobs/{new_job.id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["inputs"] == {"test": "data"} + assert resp.json["outputs"] == {"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}} + assert resp.json["headers"] == { + "Accept": None, + "Accept-Language": None, + "Content-Type": None, + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + "X-WPS-Output-Context": None, + } + assert resp.json["mode"] == ExecuteMode.ASYNC + assert resp.json["response"] == ExecuteResponse.DOCUMENT + + # modify job definition + path = f"/jobs/{new_job.id}" + body = { + "inputs": {"test": "modified", "new": 123}, + "outputs": {"test": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}}, + } + headers = { + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.APP_JSON, + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}; wait=5", + } + resp = self.app.patch(path, params=body, headers=headers) + assert resp.status_code == 204 + + # validate changes applied and resolved accordingly + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["inputs"] == {"test": "modified", "new": 123} + assert resp.json["outputs"] == {"test": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}} + assert resp.json["headers"] == { + "Accept": None, + "Accept-Language": None, + "Content-Type": None, + "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + "X-WPS-Output-Context": "public" + } + assert resp.json["mode"] == ExecuteMode.SYNC, "Should have been modified from 'wait' preference." + assert resp.json["response"] == ExecuteResponse.RAW, "Should have been modified from 'return' preference." + + @pytest.mark.oap_part4 + def test_job_status_alt_openeo_accept_response(self): + """ + Validate retrieval of :term:`Job` status response with alternate value mapping by ``Accept`` header. + """ + assert self.job_info[0].status == Status.SUCCEEDED, "Precondition invalid." + headers = {"Accept": "application/json; profile=openeo"} + path = f"/jobs/{self.job_info[0].id}" + resp = self.app.get(path, headers=headers) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.FINISHED + + assert self.job_info[0].status == Status.FAILED, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=headers) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.ERROR + + assert self.job_info[9].status == Status.RUNNING, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=headers) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.RUNNING + + assert self.job_info[11].status == Status.ACCEPTED, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=headers) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.QUEUED + + @pytest.mark.oap_part4 + def test_job_status_alt_openeo_profile_response(self): + """ + Validate retrieval of :term:`Job` status response with alternate value mapping by ``profile`` query parameter. + """ + assert self.job_info[0].status == Status.SUCCEEDED, "Precondition invalid." + path = f"/jobs/{self.job_info[0].id}" + resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.FINISHED + + assert self.job_info[0].status == Status.FAILED, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.ERROR + + assert self.job_info[9].status == Status.RUNNING, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.RUNNING + + assert self.job_info[11].status == Status.ACCEPTED, "Precondition invalid." + path = f"/jobs/{self.job_info[1].id}" + resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + assert resp.status_code == 200 + assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + assert resp.json["status"] == Status.QUEUED @pytest.mark.oap_part1 diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index ef98f7ae2..6abc3e9f5 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -151,7 +151,7 @@ def get_process_deploy_template(self, process_id=None, cwl=None, schema=ProcessS to avoid extra package content-specific validations. """ if not process_id: - process_id = self.fully_qualified_test_process_name() + process_id = self.fully_qualified_test_name() body = { "processDescription": {}, "deploymentProfileName": "http://www.opengis.net/profiles/eoc/dockerizedApplication", @@ -541,7 +541,7 @@ def test_get_processes_with_providers_error_servers(self, mock_responses): def test_set_jobControlOptions_async_execute(self): path = "/processes" - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) process_data["processDescription"]["jobControlOptions"] = [ExecuteControlOption.ASYNC] package_mock = mocked_process_package() @@ -557,7 +557,7 @@ def test_set_jobControlOptions_async_execute(self): def test_set_jobControlOptions_sync_execute(self): path = "/processes" - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) process_data["processDescription"]["jobControlOptions"] = [ExecuteControlOption.SYNC] package_mock = mocked_process_package() @@ -574,7 +574,7 @@ def test_set_jobControlOptions_sync_execute(self): def test_get_processes_invalid_schemas_handled(self): path = "/processes" # deploy valid test process - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) package_mock = mocked_process_package() with contextlib.ExitStack() as stack: @@ -669,7 +669,7 @@ def test_describe_process_visibility_private(self): assert resp.content_type == ContentType.APP_JSON def test_deploy_process_success(self): - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) package_mock = mocked_process_package() @@ -684,7 +684,7 @@ def test_deploy_process_success(self): assert isinstance(resp.json["deploymentDone"], bool) and resp.json["deploymentDone"] def test_deploy_process_ogc_schema(self): - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name, schema=ProcessSchema.OGC) process_desc = process_data["processDescription"] package_mock = mocked_process_package() @@ -727,7 +727,7 @@ def test_deploy_process_short_name(self): assert resp.json["process"]["id"] == process_name def test_deploy_process_bad_name(self): - process_name = f"{self.fully_qualified_test_process_name()}..." + process_name = f"{self.fully_qualified_test_name()}..." process_data = self.get_process_deploy_template(process_name) package_mock = mocked_process_package() @@ -753,7 +753,7 @@ def test_deploy_process_conflict(self): assert resp.content_type == ContentType.APP_JSON def test_deploy_process_missing_or_invalid_components(self): - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) package_mock = mocked_process_package() @@ -787,7 +787,7 @@ def test_deploy_process_default_endpoint_wps1(self): """ Validates that the default (localhost) endpoint to execute WPS requests are saved during deployment. """ - process_name = self.fully_qualified_test_process_name() + process_name = self.fully_qualified_test_name() process_data = self.get_process_deploy_template(process_name) package_mock = mocked_process_package() @@ -2223,14 +2223,14 @@ def test_delete_process_not_accessible(self): assert resp.content_type == ContentType.APP_JSON def test_delete_process_not_found(self): - name = self.fully_qualified_test_process_name() + name = self.fully_qualified_test_name() path = f"/processes/{name}" resp = self.app.delete_json(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 404, f"Error: {resp.text}" assert resp.content_type == ContentType.APP_JSON def test_delete_process_bad_name(self): - name = f"{self.fully_qualified_test_process_name()}..." + name = f"{self.fully_qualified_test_name()}..." path = f"/processes/{name}" resp = self.app.delete_json(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 400, f"Error: {resp.text}" @@ -2438,7 +2438,7 @@ def test_get_process_visibility_expected_response(self): assert "value" not in resp.json def test_get_process_visibility_not_found(self): - path = f"/processes/{self.fully_qualified_test_process_name()}/visibility" + path = f"/processes/{self.fully_qualified_test_name()}/visibility" resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 404 assert resp.content_type == ContentType.APP_JSON diff --git a/tests/wps_restapi/test_providers.py b/tests/wps_restapi/test_providers.py index 4cb249a6a..8fee30831 100644 --- a/tests/wps_restapi/test_providers.py +++ b/tests/wps_restapi/test_providers.py @@ -19,17 +19,15 @@ from weaver.execute import ExecuteControlOption, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.processes.constants import ProcessSchema -from weaver.utils import fully_qualified_name +from tests.functional.utils import GenericUtils -class WpsProviderBase(unittest.TestCase): + +class WpsProviderBase(GenericUtils): remote_provider_name = None settings = {} config = None - def fully_qualified_test_process_name(self): - return fully_qualified_name(self).replace(".", "-") - def register_provider(self, clear=True, error=False, data=None): if clear: self.service_store.clear_services() diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index dcde6d21e..960301b56 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -30,7 +30,6 @@ submit_job, submit_job_dispatch_task, submit_job_dispatch_wps, - submit_job_handler, update_job_parameters ) from weaver.processes.utils import get_process diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 8c496e1b9..bc3f644ac 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -221,6 +221,9 @@ OGC_API_BBOX_FORMAT = "ogc-bbox" # equal CRS:84 and EPSG:4326, equivalent to WGS84 with swapped lat-lon order OGC_API_BBOX_EPSG = "EPSG:4326" +OPENEO_API_SCHEMA_URL = "https://openeo.org/documentation/1.0/developers/api/openapi.yaml" +OPENEO_API_SCHEMA_JOB_STATUS_URL = f"{OPENEO_API_SCHEMA_URL}#/components/schemas/batch_job" + WEAVER_SCHEMA_VERSION = "master" WEAVER_SCHEMA_URL = f"https://raw.githubusercontent.com/crim-ca/weaver/{WEAVER_SCHEMA_VERSION}/weaver/schemas" From 88d730450bad03a66af3a972b6f7c204b047c293 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 16 Oct 2024 16:51:25 -0400 Subject: [PATCH 08/33] add test and fixes for POST /jobs/{jobID}/results trigger execution --- tests/functional/test_wps_package.py | 7 +++++- tests/functional/utils.py | 10 +++++--- weaver/processes/execution.py | 36 +++++++++++++++++++++------- weaver/store/base.py | 3 ++- weaver/store/mongodb.py | 6 +++-- weaver/typedefs.py | 3 ++- weaver/wps_restapi/jobs/jobs.py | 5 ++-- weaver/wps_restapi/jobs/utils.py | 16 +++++++++---- 8 files changed, 61 insertions(+), 25 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index d5e2078db..88a5f603c 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5687,6 +5687,8 @@ def test_execute_jobs_create_trigger(self): with contextlib.ExitStack() as stack: for mock_exec in mocked_execute_celery(): stack.enter_context(mock_exec) + + # create the job, with pending status (not in worker processing queue) path = "/jobs" resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=exec_headers, only_local=True) @@ -5695,9 +5697,10 @@ def test_execute_jobs_create_trigger(self): assert resp.headers["Preference-Applied"] == prefer_header.replace(",", ";") status_url = resp.json["location"] - status = self.monitor_job(status_url, return_status=True) + status = self.monitor_job(status_url, return_status=True, wait_for_status=Status.CREATED) assert status["status"] == Status.CREATED + # trigger the execution (submit the task to worker processing queue) job_id = status["jobID"] res_path = f"/jobs/{job_id}/results" res_headers = { @@ -5706,7 +5709,9 @@ def test_execute_jobs_create_trigger(self): resp = mocked_sub_requests(self.app, "post_json", res_path, timeout=5, data={}, headers=res_headers, only_local=True) assert resp.status_code == 202, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.json["status"] == Status.ACCEPTED + # retrieve the execution status status = self.monitor_job(status_url, return_status=True) assert status["status"] == Status.SUCCEEDED diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 733a7bcee..f5ed5fb16 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -41,6 +41,7 @@ from pyramid.config import Configurator from webtest import TestApp + from weaver.status import AnyStatusType from weaver.store.mongodb import MongodbJobStore, MongodbProcessStore, MongodbServiceStore from weaver.typedefs import ( AnyRequestMethod, @@ -469,7 +470,7 @@ def monitor_job(self, timeout=None, # type: Optional[int] interval=None, # type: Optional[int] return_status=False, # type: bool - wait_for_status=None, # type: Optional[str] + wait_for_status=None, # type: Optional[AnyStatusType] expect_failed=False, # type: bool ): # type: (...) -> Union[ExecutionResults, JobStatusResponse] """ @@ -498,8 +499,11 @@ def check_job_status(_resp, running=False): # type: (AnyResponseType, bool) -> bool body = _resp.json pretty = json.dumps(body, indent=2, ensure_ascii=False) - final = Status.FAILED if expect_failed else Status.SUCCEEDED - statuses = [Status.ACCEPTED, Status.RUNNING, final] if running else [final] + if wait_for_status is None: + final_status = Status.FAILED if expect_failed else Status.SUCCEEDED + else: + final_status = wait_for_status + statuses = [Status.ACCEPTED, Status.RUNNING, final_status] if running else [final_status] assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{self._try_get_logs(status_url)}" assert body["status"] in statuses, f"Error job info:\n{pretty}\n{self._try_get_logs(status_url)}" return body["status"] in {wait_for_status, Status.SUCCEEDED, Status.FAILED} # break condition diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 8e7e3e8d0..2e4594173 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -11,7 +11,14 @@ from celery.utils.log import get_task_logger from owslib.util import clean_ows_url from owslib.wps import BoundingBoxDataInput, ComplexDataInput -from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable, HTTPUnprocessableEntity, HTTPUnsupportedMediaType +from pyramid.httpexceptions import ( + HTTPAccepted, + HTTPBadRequest, + HTTPCreated, + HTTPNotAcceptable, + HTTPUnprocessableEntity, + HTTPUnsupportedMediaType +) from pyramid_celery import celery_app as app from werkzeug.wrappers.request import Request as WerkzeugRequest @@ -809,7 +816,7 @@ def submit_job_handler(payload, # type: ProcessExecution req_headers = copy.deepcopy(headers or {}) get_header("prefer", headers, pop=True) # don't care about value, just ensure removed with any header container - job_pending_created = payload.get("status") == "create" + job_pending_created = json_body.get("status") == "create" if job_pending_created: job_status = Status.CREATED job_message = "Job created with pending trigger." @@ -821,7 +828,7 @@ def submit_job_handler(payload, # type: ProcessExecution job_inputs = json_body.get("inputs") job_outputs = json_body.get("outputs") store = db.get_store(StoreJobs) # type: StoreJobs - job = store.save_job(task_id=job_status, process=process, service=provider_id, + job = store.save_job(task_id=job_status, process=process, service=provider_id, status=job_status, inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local, execute_async=is_execute_async, execute_wait=wait, execute_response=exec_resp, execute_return=exec_return, @@ -835,11 +842,12 @@ def submit_job_handler(payload, # type: ProcessExecution def submit_job_dispatch_task( - job, # type: Job - *, # force named keyword arguments after - container, # type: AnySettingsContainer - headers=None, # type: AnyHeadersContainer -): # type: (...) -> AnyResponseType + job, # type: Job + *, # force named keyword arguments after + container, # type: AnySettingsContainer + headers=None, # type: AnyHeadersContainer + force_submit=False, # type: bool +): # type: (...) -> AnyResponseType """ Submits the :term:`Job` to the :mod:`celery` worker with provided parameters. @@ -854,6 +862,16 @@ def submit_job_dispatch_task( task_result = None # type: Optional[CeleryResult] job_pending_created = job.status == Status.CREATED + if job_pending_created and force_submit: + # preemptively update job status to avoid next + # dispatch steps ignoring submission to the worker + job.status = Status.ACCEPTED + job = store.update_job(job) + job_pending_created = False + response_class = HTTPAccepted + else: + response_class = HTTPCreated + if not job_pending_created: wps_url = clean_ows_url(job.wps_url) task_result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) @@ -913,7 +931,7 @@ def submit_job_dispatch_task( "location": location_url, # for convenience/backward compatibility, but official is Location *header* } resp_headers = update_preference_applied_return_header(job, req_headers, resp_headers) - resp = get_job_submission_response(body, resp_headers) + resp = get_job_submission_response(body, resp_headers, response_class=response_class) return resp diff --git a/weaver/store/base.py b/weaver/store/base.py index 3eeada1e7..3ad9ab6a8 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -14,7 +14,7 @@ from weaver.datatype import Bill, Job, Process, Quote, Service, VaultFile from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference from weaver.sort import AnySortType - from weaver.status import AnyStatusSearch + from weaver.status import AnyStatusSearch, AnyStatusType from weaver.typedefs import ( AnyProcessRef, AnyServiceRef, @@ -186,6 +186,7 @@ def save_job(self, accept_type=None, # type: Optional[str] accept_language=None, # type: Optional[str] created=None, # type: Optional[datetime.datetime] + status=None, # type: Optional[AnyStatusType] ): # type: (...) -> Job raise NotImplementedError diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 7b14df55f..c8b3b4ce0 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -66,7 +66,7 @@ from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference from weaver.processes.types import AnyProcessType from weaver.sort import AnySortType - from weaver.status import AnyStatusSearch + from weaver.status import AnyStatusSearch, AnyStatusType from weaver.store.base import DatetimeIntervalType, JobGroupCategory, JobSearchResult from weaver.typedefs import ( AnyProcess, @@ -802,6 +802,7 @@ def save_job(self, accept_type=None, # type: Optional[str] accept_language=None, # type: Optional[str] created=None, # type: Optional[datetime.datetime] + status=None, # type: Optional[AnyStatusType] ): # type: (...) -> Job """ Creates a new :class:`Job` and stores it in mongodb. @@ -821,6 +822,7 @@ def save_job(self, if not access: access = Visibility.PRIVATE + status = map_status(Status.get(status, default=Status.ACCEPTED)) process = process.id if isinstance(process, Process) else process service = service.id if isinstance(service, Service) else service new_job = Job({ @@ -830,7 +832,7 @@ def save_job(self, "process": process, # process identifier (WPS request) "inputs": inputs, "outputs": outputs, - "status": map_status(Status.ACCEPTED), + "status": status, "execution_mode": execute_mode, "execution_wait": execute_wait, "execution_response": execute_response, diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 6564a6e18..17574c261 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -377,7 +377,8 @@ class CWL_SchemaName(Protocol): AnyHeadersCookieContainer = Union[AnyHeadersContainer, AnyCookiesContainer] AnyRequestType = Union[PyramidRequest, WerkzeugRequest, PreparedRequest, RequestsRequest, DummyRequest] AnyResponseType = Union[PyramidResponse, WebobResponse, RequestsResponse, TestResponse] - AnyViewResponse = Union[PyramidResponse, WebobResponse, HTTPException, JSON] + AnyResponseClass = Union[PyramidResponse, WebobResponse, HTTPException] + AnyViewResponse = Union[AnyResponseClass, JSON] RequestMethod = Literal[ "HEAD", "GET", "POST", "PUT", "PATCH", "DELETE", "head", "get", "post", "put", "patch", "delete", diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 960301b56..9bcb91ee8 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -4,6 +4,7 @@ from celery.utils.log import get_task_logger from colander import Invalid from pyramid.httpexceptions import ( + HTTPAccepted, HTTPBadRequest, HTTPOk, HTTPNoContent, @@ -288,9 +289,7 @@ def trigger_job_execution(request): job = get_job(request) raise_job_dismissed(job, request) raise_job_bad_status_locked(job, request) - # FIXME: reuse job, adjust function or map parameters from attributes - # FIXME: alt 202 code for accepted on async when triggered this way - return submit_job_dispatch_task(job, container=request) + return submit_job_dispatch_task(job, container=request, force_submit=True) @sd.provider_job_service.get( diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 65ca2ed33..0bd870651 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -68,7 +68,7 @@ from weaver.wps_restapi.providers.utils import forbid_local_only if TYPE_CHECKING: - from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference, AnyExecuteTransmissionMode from weaver.formats import AnyContentEncoding @@ -77,6 +77,7 @@ AnyDataStream, AnyHeadersContainer, AnyRequestType, + AnyResponseClass, AnyResponseType, AnySettingsContainer, AnyValueType, @@ -1053,8 +1054,12 @@ def add_result_parts(result_parts): return resp -def get_job_submission_response(body, headers, error=False): - # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated, HTTPBadRequest] +def get_job_submission_response( + body, # type: JSON + headers, # type: AnyHeadersContainer + error=False, # type: bool + response_class=None, # type: Optional[Type[AnyResponseClass]] +): # type: (...) -> Union[AnyResponseClass, HTTPBadRequest] """ Generates the response contents returned by :term:`Job` submission process. @@ -1083,7 +1088,7 @@ def get_job_submission_response(body, headers, error=False): http_class = HTTPBadRequest http_desc = sd.FailedSyncJobResponse.description else: - http_class = HTTPOk + http_class = response_class or HTTPOk http_desc = sd.CompletedJobResponse.description body = sd.CompletedJobStatusSchema().deserialize(body) @@ -1101,7 +1106,8 @@ def get_job_submission_response(body, headers, error=False): "Execution should begin when resources are available." ) body = sd.CreatedJobStatusSchema().deserialize(body) - return HTTPCreated(json=body, headerlist=headers) + http_class = response_class or HTTPCreated + return http_class(json=body, headerlist=headers) def validate_service_process(request): From c6356bd5e38b28724bbd55448c2aabf1762a7e92 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 17 Oct 2024 16:41:02 -0400 Subject: [PATCH 09/33] patch typing definitions --- tests/functional/test_wps_package.py | 62 ++++++++++++++++------------ tests/functional/utils.py | 7 ++-- weaver/typedefs.py | 9 ++++ 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 88a5f603c..29cf739c0 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -83,7 +83,14 @@ from responses import RequestsMock - from weaver.typedefs import CWL_AnyRequirements, CWL_RequirementsDict, JSON, Number + from weaver.typedefs import ( + CWL_AnyRequirements, + CWL_RequirementsDict, + JSON, + Number, + ProcessOfferingMapping, + ProcessOfferingListing + ) EDAM_PLAIN = f"{EDAM_NAMESPACE}:{EDAM_MAPPING[ContentType.TEXT_PLAIN]}" OGC_NETCDF = f"{OGC_NAMESPACE}:{OGC_MAPPING[ContentType.APP_NETCDF]}" @@ -169,7 +176,7 @@ def test_deploy_ogc_schema(self): # even if deployed as OGC schema, OLD schema can be converted back desc = self.describe_process(self._testMethodName, ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert "inputs" in proc and isinstance(proc["inputs"], list) and len(proc["inputs"]) == 1 assert "outputs" in proc and isinstance(proc["outputs"], list) and len(proc["outputs"]) == 1 assert proc["inputs"][0]["id"] == "url" @@ -608,7 +615,7 @@ def test_deploy_merge_literal_io_from_package(self): "executionUnit": [{"unit": cwl}], } desc, _ = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName assert proc["title"] == "some title" @@ -707,7 +714,7 @@ def test_deploy_merge_literal_io_from_package_and_offering(self): "executionUnit": [{"unit": cwl}], } desc, pkg = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName assert proc["title"] == "some title" @@ -865,7 +872,7 @@ def test_deploy_merge_complex_io_format_references(self): }}], } desc, pkg = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["inputs"][0]["id"] == "wps_only_format_exists" assert len(proc["inputs"][0]["formats"]) == 1 @@ -989,7 +996,7 @@ def test_deploy_merge_mediatype_io_format_references(self): }] } desc, _ = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["inputs"][0]["id"] == "wps_format_mimeType" assert proc["inputs"][0]["formats"][0]["mediaType"] == ContentType.APP_JSON assert proc["inputs"][1]["id"] == "wps_format_mediaType" @@ -1418,7 +1425,7 @@ def test_deploy_merge_complex_io_with_multiple_formats_and_defaults(self): "executionUnit": [{"unit": cwl}], } desc, pkg = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing # process description input validation assert proc["inputs"][0]["id"] == "single_value_single_format" @@ -1660,7 +1667,7 @@ def test_deploy_merge_resolution_io_min_max_occurs(self): "executionUnit": [{"unit": cwl}], } desc, pkg = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["inputs"][0]["id"] == "required_literal" assert proc["inputs"][0]["minOccurs"] == 1 @@ -1795,7 +1802,7 @@ def test_deploy_merge_valid_io_min_max_occurs_as_str_or_int(self): self.fail("MinOccurs/MaxOccurs values defined as valid int/str should not raise an invalid schema error") inputs = body["processDescription"]["inputs"] # type: List[JSON] - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert isinstance(proc["inputs"], list) assert len(proc["inputs"]) == len(inputs) for i, process_input in enumerate(inputs): @@ -1843,23 +1850,24 @@ def test_deploy_merge_wps_io_as_mappings(self): "executionUnit": [{"unit": cwl}], } desc, _ = self.deploy_process(body, describe_schema=ProcessSchema.OGC) + proc = desc # type: ProcessOfferingMapping - assert isinstance(desc["inputs"], dict) - assert len(desc["inputs"]) == len(body["processDescription"]["process"]["inputs"]) - assert isinstance(desc["outputs"], dict) - assert len(desc["outputs"]) == len(body["processDescription"]["process"]["outputs"]) + assert isinstance(proc["inputs"], dict) + assert len(proc["inputs"]) == len(body["processDescription"]["process"]["inputs"]) + assert isinstance(proc["outputs"], dict) + assert len(proc["outputs"]) == len(body["processDescription"]["process"]["outputs"]) # following inputs metadata were correctly parsed from WPS mapping entries if defined and not using defaults - assert desc["inputs"]["input_num"]["title"] == "Input numbers" - assert desc["inputs"]["input_num"]["maxOccurs"] == 20 - assert desc["inputs"]["input_num"]["literalDataDomains"][0]["dataType"]["name"] == "float" - assert desc["inputs"]["input_file"]["title"] == "Test File" - assert desc["inputs"]["input_file"]["formats"][0]["mediaType"] == ContentType.APP_ZIP - assert desc["outputs"]["values"]["title"] == "Test Output" - assert desc["outputs"]["values"]["description"] == "CSV raw values" - assert desc["outputs"]["values"]["literalDataDomains"][0]["dataType"]["name"] == "string" - assert desc["outputs"]["out_file"]["title"] == "Result File" - assert desc["outputs"]["out_file"]["formats"][0]["mediaType"] == "text/csv" + assert proc["inputs"]["input_num"]["title"] == "Input numbers" + assert proc["inputs"]["input_num"]["maxOccurs"] == 20 + assert proc["inputs"]["input_num"]["literalDataDomains"][0]["dataType"]["name"] == "float" + assert proc["inputs"]["input_file"]["title"] == "Test File" + assert proc["inputs"]["input_file"]["formats"][0]["mediaType"] == ContentType.APP_ZIP + assert proc["outputs"]["values"]["title"] == "Test Output" + assert proc["outputs"]["values"]["description"] == "CSV raw values" + assert proc["outputs"]["values"]["literalDataDomains"][0]["dataType"]["name"] == "string" + assert proc["outputs"]["out_file"]["title"] == "Result File" + assert proc["outputs"]["out_file"]["formats"][0]["mediaType"] == "text/csv" def test_execute_job_with_accept_languages(self): """ @@ -3014,7 +3022,7 @@ def test_deploy_merge_complex_io_from_package(self): "executionUnit": [{"unit": cwl}], } desc, _ = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName assert proc["title"] == "some title" assert proc["description"] == "this is a test" @@ -3111,7 +3119,7 @@ def test_deploy_merge_complex_io_from_package_and_offering(self): "executionUnit": [{"unit": cwl}], } desc, pkg = self.deploy_process(body, describe_schema=ProcessSchema.OLD) - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName assert proc["title"] == "some title" @@ -3185,7 +3193,7 @@ def test_deploy_literal_and_complex_io_from_wps_xml_reference(self): # basic contents validation assert "cwlVersion" in pkg assert "process" in desc - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName # package I/O validation @@ -3278,7 +3286,7 @@ def test_deploy_enum_array_and_multi_format_inputs_from_wps_xml_reference(self): # basic contents validation assert "cwlVersion" in pkg assert "process" in desc - proc = desc["process"] + proc = desc["process"] # type: ProcessOfferingListing assert proc["id"] == self._testMethodName # package I/O validation diff --git a/tests/functional/utils.py b/tests/functional/utils.py index f5ed5fb16..d51aa172f 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -41,6 +41,7 @@ from pyramid.config import Configurator from webtest import TestApp + from weaver.processes.constants import ProcessSchemaOGCType, ProcessSchemaOLDType, ProcessSchemaType from weaver.status import AnyStatusType from weaver.store.mongodb import MongodbJobStore, MongodbProcessStore, MongodbServiceStore from weaver.typedefs import ( @@ -378,7 +379,7 @@ def describe_process(cls, process_id, describe_schema=ProcessSchema.OGC): def deploy_process(cls, payload, # type: JSON process_id=None, # type: Optional[str] - describe_schema=ProcessSchema.OGC, # type: Literal[ProcessSchema.OGC] # noqa + describe_schema=ProcessSchema.OGC, # type: ProcessSchemaOGCType mock_requests_only_local=True, # type: bool add_package_requirement=True, # type: bool ): # type: (...) -> Tuple[ProcessDescriptionMapping, CWL] @@ -389,7 +390,7 @@ def deploy_process(cls, def deploy_process(cls, payload, # type: JSON process_id=None, # type: Optional[str] - describe_schema=ProcessSchema.OGC, # type: Literal[ProcessSchema.OLD] # noqa + describe_schema=ProcessSchema.OGC, # type: ProcessSchemaOLDType mock_requests_only_local=True, # type: bool add_package_requirement=True, # type: bool ): # type: (...) -> Tuple[ProcessDescriptionListing, CWL] @@ -399,7 +400,7 @@ def deploy_process(cls, def deploy_process(cls, payload, # type: JSON process_id=None, # type: Optional[str] - describe_schema=ProcessSchema.OGC, # type: ProcessSchema + describe_schema=ProcessSchema.OGC, # type: ProcessSchemaType mock_requests_only_local=True, # type: bool add_package_requirement=True, # type: bool ): # type: (...) -> Tuple[ProcessDescription, CWL] diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 17574c261..9ad88a300 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -894,6 +894,14 @@ class CWL_SchemaName(Protocol): "schema": NotRequired[Union[str, OpenAPISchema]], "default": NotRequired[bool], }, total=False) + LiteralDataDomainDataType = TypedDict("LiteralDataDomainDataType", { + "name": Required[str] + }) + LiteralDataDomainType = TypedDict("LiteralDataDomainType", { + "dataType": Required[LiteralDataDomainDataType], + "valueDefinition": NotRequired[AnyValueType], + "defaultValue": NotRequired[AnyValueType], + }, total=False) ProcessInputOutputItem = TypedDict("ProcessInputOutputItem", { "id": str, "title": NotRequired[str], @@ -902,6 +910,7 @@ class CWL_SchemaName(Protocol): "metadata": NotRequired[List[Metadata]], "schema": NotRequired[OpenAPISchema], "formats": NotRequired[List[FormatMediaType]], + "literalDataDomains": NotRequired[List[LiteralDataDomainType]], "minOccurs": int, "maxOccurs": Union[int, Literal["unbounded"]], }, total=False) From c587dac3c54851585932c26254224bc85dfca49e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 17 Oct 2024 19:37:17 -0400 Subject: [PATCH 10/33] add job update operations --- tests/wps_restapi/test_jobs.py | 43 ++++++++-- weaver/processes/execution.py | 99 ++++++++++++++++++++++- weaver/typedefs.py | 1 + weaver/wps_restapi/swagger_definitions.py | 44 ++++++++-- 4 files changed, 174 insertions(+), 13 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 1fd027d34..0a431d122 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -30,7 +30,7 @@ setup_mongodb_servicestore ) from weaver.compat import Version -from weaver.datatype import Job, Service +from weaver.datatype import Job, Process, Service from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.notify import decrypt_email @@ -1804,7 +1804,7 @@ def test_job_update_locked(self): ) path = f"/jobs/{new_job.id}" body = {"inputs": {"test": 400}} - resp = self.app.patch(path, params=body, headers=self.json_headers, expect_errors=True) + resp = self.app.patch_json(path, params=body, headers=self.json_headers, expect_errors=True) assert resp.status_code == 423 assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/locked" @@ -1812,8 +1812,9 @@ def test_job_update_locked(self): def test_job_update_response(self): new_job = self.make_job( task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, - status=Status.CREATED, progress=100, access=Visibility.PUBLIC, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, inputs={"test": "data"}, outputs={"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}}, + subscribers={"successUri": "https://example.com/random"}, ) # check precondition job setup @@ -1837,30 +1838,62 @@ def test_job_update_response(self): body = { "inputs": {"test": "modified", "new": 123}, "outputs": {"test": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}}, + "subscribers": { + "successUri": "https://example.com/success", + "failedUri": "https://example.com/failed", + }, } headers = { "Accept": ContentType.APP_JSON, "Content-Type": ContentType.APP_JSON, "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}; wait=5", } - resp = self.app.patch(path, params=body, headers=headers) + resp = self.app.patch_json(path, params=body, headers=headers) assert resp.status_code == 204 # validate changes applied and resolved accordingly + path = f"/jobs/{new_job.id}/inputs" resp = self.app.get(path, headers=self.json_headers) assert resp.status_code == 200 assert resp.json["inputs"] == {"test": "modified", "new": 123} assert resp.json["outputs"] == {"test": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}} + assert resp.json["subscribers"] == { + "successUri": "https://example.com/success", + "failedUri": "https://example.com/failed", + } assert resp.json["headers"] == { "Accept": None, "Accept-Language": None, "Content-Type": None, - "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}; wait=5", "X-WPS-Output-Context": "public" } assert resp.json["mode"] == ExecuteMode.SYNC, "Should have been modified from 'wait' preference." assert resp.json["response"] == ExecuteResponse.RAW, "Should have been modified from 'return' preference." + @pytest.mark.oap_part4 + def test_job_update_response_process_disallowed(self): + proc_id = self.fully_qualified_test_name() + process = WpsTestProcess(identifier=proc_id) + process = Process.from_wps(process) + process["processDescriptionURL"] = f"https://localhost/processes/{proc_id}" + self.process_store.save_process(process) + + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=proc_id, service=None, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, + ) + + path = f"/jobs/{new_job.id}" + body = {"process": "https://localhost/processes/random"} + resp = self.app.patch_json(path, params=body, headers=self.json_headers, expect_errors=True) + assert resp.status_code == 400 + assert resp.json["cause"] == {"name": "process", "in": "body"} + assert resp.json["value"] == { + "body.process": "https://localhost/processes/random", + "job.process": f"https://localhost/processes/{proc_id}", + } + @pytest.mark.oap_part4 def test_job_status_alt_openeo_accept_response(self): """ diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 2e4594173..70cff87da 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -15,6 +15,7 @@ HTTPAccepted, HTTPBadRequest, HTTPCreated, + HTTPException, HTTPNotAcceptable, HTTPUnprocessableEntity, HTTPUnsupportedMediaType @@ -28,6 +29,7 @@ ExecuteControlOption, ExecuteMode, parse_prefer_header_execute_mode, + parse_prefer_header_return, update_preference_applied_return_header ) from weaver.formats import AcceptLanguage, ContentType, clean_media_type_format, map_cwl_media_type, repr_json @@ -43,6 +45,7 @@ ows2json_output_data ) from weaver.processes.types import ProcessType +from weaver.processes.utils import get_process from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreJobs, StoreProcesses from weaver.utils import ( @@ -943,7 +946,101 @@ def update_job_parameters(job, request): body = validate_job_json(request) body = validate_job_schema(body, sd.PatchJobBodySchema) - raise NotImplementedError # FIXME: implement + value = field = loc = None + job_process = get_process(job.process) + try: + loc = "body" + if "process" in body: + # note: don't use 'get_process' for input process, as it might not even exist! + req_process_url = body["process"] + req_process_id = body["process"].rsplit("/processes/", 1)[-1] + if req_process_id != job_process.id or req_process_url != job_process.processDescriptionURL: + raise HTTPBadRequest( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "InvalidJobUpdate", + "title": "Invalid Job Execution Update", + "detail": "Update of the reference process for the job execution is not permitted.", + "status": HTTPBadRequest.code, + "cause": {"name": "process", "in": loc}, + "value": repr_json({ + "body.process": body["process"], + "job.process": job_process.processDescriptionURL, + }, force_string=False), + }) + ) + + for node in sd.PatchJobBodySchema().children: + field = node.name + if not field or field not in body: + continue + if field in ["subscribers", "notification_email"]: + continue # will be handled simultaneously after + + value = body[field] # type: ignore + if node.name in job: + setattr(job, field, value) + elif f"execution_{field}" in job: + field = f"execution_{field}" + if field == "execution_mode" and value in [ExecuteMode.ASYNC, ExecuteMode.SYNC]: + job_ctrl_exec = ExecuteControlOption.get(f"{value}-execute") + if job_ctrl_exec not in job_process.jobControlOptions: + raise HTTPBadRequest( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "InvalidJobUpdate", + "title": "Invalid Job Execution Update", + "detail": "Update of job execution mode is not permitted by process jobControlOptions.", + "status": HTTPBadRequest.code, + "cause": {"name": "mode", "in": loc}, + "value": repr_json( + { + "process.jobControlOptions": job_process.jobControlOptions, + "job.mode": job_ctrl_exec, + }, force_string=False + ), + }) + ) + + setattr(job, field, value) + + settings = get_settings(request) + subscribers = map_job_subscribers(body, settings=settings) + if not subscribers and body.get("subscribers") == {}: + subscribers = {} # asking to remove all subscribers explicitly + if subscribers is not None: + job.subscribers = subscribers + + # for both 'mode' and 'response' + # if provided both in body and corresponding 'Prefer' header parameter, the body parameter takes precedence + # however, if provided only in header, allow override of the body parameter considered as "higher priority" + loc = "header" + if "mode" not in body: + mode, wait, _ = parse_prefer_header_execute_mode(request.headers, job_process.jobControlOptions) + job.execution_mode = mode + job.execution_wait = wait + if "response" in body: + job_return = parse_prefer_header_return(request.headers) + if job_return: + job.execution_return = job_return + + except HTTPException: + raise + except ValueError as exc: + raise HTTPUnprocessableEntity( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "InvalidJobUpdate", + "title": "Invalid Job Execution Update", + "detail": "Could not update the job execution definition using specified parameters.", + "status": HTTPUnprocessableEntity.code, + "error": type(exc), + "cause": {"name": field, "in": loc}, + "value": repr_json(value, force_string=False), + }) + ) + + LOGGER.info("Updating %s", job) + db = get_db(request) + store = db.get_store(StoreJobs) + store.update_job(job) def validate_job_json(request): diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 9ad88a300..dac897dc1 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -984,6 +984,7 @@ class CWL_SchemaName(Protocol): }, total=True) ProcessExecution = TypedDict("ProcessExecution", { + "process": NotRequired[str], "status": NotRequired[Literal["create"]], "mode": NotRequired[AnyExecuteMode], "response": NotRequired[AnyExecuteResponse], diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index bc3f644ac..00dcf7987 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -4210,6 +4210,13 @@ class Execute(ExecuteInputOutputs): "value": EXAMPLES["job_execute.json"], }, } + process = ProcessURL( + missing=drop, + description=( + "Process reference to be executed. " + "This parameter is required if the process cannot be inferred from the request endpoint." + ), + ) status = JobStatusCreate( description=( "Status to request creation of the job without submitting it to processing queue " @@ -6061,9 +6068,13 @@ class JobExecuteHeaders(ExtendedMappingSchema): class JobInputsBody(ExecuteInputOutputs): + # note: + # following definitions do not employ 'missing=drop' to explicitly indicate the fields + # this makes it easier to consider everything that could be implied when executing the job mode = JobExecuteModeEnum(default=ExecuteMode.AUTO) response = JobResponseOptionsEnum(default=None) headers = JobExecuteHeaders(missing={}) + subscribers = JobExecuteSubscribers(missing={}) links = LinkList(missing=drop) @@ -6584,13 +6595,25 @@ class PostProcessJobsEndpointXML(PostJobsEndpointXML, LocalProcessPath): class PatchJobBodySchema(Execute): - description = "Execution request parameters to be updated." + description = "Execution request contents to be updated." # all parameters that are not 'missing=drop' must be added to allow partial update inputs = ExecuteInputValues(missing=drop, description="Input values or references to be updated.") outputs = ExecuteOutputSpec(missing=drop, description="Output format and transmission mode to be updated.") class PatchJobEndpoint(JobPath): + summary = "Execution request parameters to be updated." + description = ( + "Execution request parameters to be updated. " + "If parameters are omitted, they will be left unmodified. " + "If provided, parameters will override existing definitions integrally. " + "Therefore, if only a partial update of certain nested elements in a mapping or list is desired, " + "all elements under the corresponding parameters must be resubmitted entirely with the applied changes. " + "In the case of certain parameters, equivalent definitions can cause conflicting definitions between " + "headers and contents " + f"(see for more details: {DOC_URL}/processes.html#execution-body and {DOC_URL}/processes.html#execution-mode). " + "To verify the resulting parameterization of any pending job, consider using the `GET /jobs/{jobId}/inputs`." + ) header = JobExecuteHeaders() querystring = LocalProcessQuery() body = PatchJobBodySchema() @@ -6895,11 +6918,6 @@ def __deepcopy__(self, *args, **kwargs): return GenericHTMLResponse(name=self.name, description=self.description, children=self.children) -class ErrorDetail(ExtendedMappingSchema): - code = ExtendedSchemaNode(Integer(), description="HTTP status code.", example=400) - status = ExtendedSchemaNode(String(), description="HTTP status detail.", example="400 Bad Request") - - class OWSErrorCode(ExtendedSchemaNode): schema_type = String example = "InvalidParameterValue" @@ -6918,6 +6936,18 @@ class OWSExceptionResponse(ExtendedMappingSchema): description="Specific description of the error.") +class ErrorDetail(ExtendedMappingSchema): + code = ExtendedSchemaNode(Integer(), description="HTTP status code.", example=400) + status = ExtendedSchemaNode(String(), description="HTTP status detail.", example="400 Bad Request") + + +class ErrorSource(OneOfKeywordSchema): + _one_of = [ + ExtendedSchemaNode(String(), description="Error name or description."), + ErrorDetail(description="Detailed error representation.") + ] + + class ErrorCause(OneOfKeywordSchema): _one_of = [ ExtendedSchemaNode(String(), description="Error message from exception or cause of failure."), @@ -6934,7 +6964,7 @@ class ErrorJsonResponseBodySchema(ExtendedMappingSchema): status = ExtendedSchemaNode(Integer(), description="Error status code.", example=500, missing=drop) cause = ErrorCause(missing=drop) value = ErrorCause(missing=drop) - error = ErrorDetail(missing=drop) + error = ErrorSource(missing=drop) instance = URI(missing=drop) exception = OWSExceptionResponse(missing=drop) From 5c061e8d8918bb4e86d5e41e2cdf51bd9baad7c2 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 17:47:07 -0400 Subject: [PATCH 11/33] save 'job.execution_mode' explicitly with 'auto' to preserve auto-resolve via 'Prefer' header across pipeline --- tests/functional/test_celery.py | 4 +- tests/functional/test_cli.py | 2 +- tests/functional/test_wps_package.py | 9 ++- tests/test_execute.py | 6 +- tests/wps_restapi/test_jobs.py | 57 +++++++++++++++---- weaver/datatype.py | 11 ++-- weaver/execute.py | 39 +++++++++++-- weaver/processes/execution.py | 67 +++++++++++++++-------- weaver/store/base.py | 4 +- weaver/store/mongodb.py | 10 ++-- weaver/wps_restapi/colander_extras.py | 4 ++ weaver/wps_restapi/jobs/jobs.py | 18 +++--- weaver/wps_restapi/swagger_definitions.py | 14 +++-- 13 files changed, 169 insertions(+), 76 deletions(-) diff --git a/tests/functional/test_celery.py b/tests/functional/test_celery.py index 3cf6949f6..d705b8e75 100644 --- a/tests/functional/test_celery.py +++ b/tests/functional/test_celery.py @@ -52,10 +52,10 @@ def test_celery_registry_resolution(): job_store = get_db(settings).get_store("jobs") job1 = job_store.save_job( task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}} - ) + ) job2 = job_store.save_job( task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}} - ) + ) with contextlib.ExitStack() as stack: celery_mongo_broker = f"""mongodb://{settings["mongodb.host"]}:{settings["mongodb.port"]}/celery-test""" diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index d72997a40..955bdb634 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -772,7 +772,7 @@ def setUp(self): super(TestWeaverCLI, self).setUp() job = self.job_store.save_job( task_id="12345678-1111-2222-3333-111122223333", process="fake-process", access=Visibility.PUBLIC - ) + ) job.status = Status.SUCCEEDED self.test_job = self.job_store.update_job(job) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 29cf739c0..472270eb4 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5521,26 +5521,25 @@ def test_execute_multi_output_response_document_mixed(self): }, } - @pytest.mark.oap_part4 def test_execute_mismatch_process(self): proc = "EchoResultsTester" p_id = self.fully_qualified_test_name(proc) body = self.retrieve_payload(proc, "deploy", local=True) self.deploy_process(body, process_id=p_id) + # use non-existing process to ensure this particular situation is handled as well + # a missing process reference must not cause an early "not-found" response proc = "random-other-process" proc_other = self.fully_qualified_test_name(proc) - body = self.retrieve_payload(proc, "deploy", local=True) - self.deploy_process(body, process_id=p_id) exec_content = { - "process": f"https://localhost/processes/{p_id}", + "process": f"https://localhost/processes/{proc_other}", "inputs": {"message": "test"} } with contextlib.ExitStack() as stack: for mock_exec in mocked_execute_celery(): stack.enter_context(mock_exec) - path = f"/processes/{proc_other}/execution" # mismatch on purpose + path = f"/processes/{p_id}/execution" # mismatch on purpose resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, data=exec_content, headers=self.json_headers, only_local=True) assert resp.status_code == 400, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" diff --git a/tests/test_execute.py b/tests/test_execute.py index 69f8685f0..fbcc587f8 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -20,12 +20,16 @@ for (_headers, _support, _expected), _extra in itertools.product( [ - # no mode + # no mode (API-wide default async) ({"Prefer": "respond-async, wait=4"}, [], (ExecuteMode.ASYNC, None, {})), # both modes supported (sync attempted upto max/specified wait time, unless async requested explicitly) + ({"Prefer": ""}, None, # explicit 'None' or omitting the parameter entirely means "any" mode + (ExecuteMode.SYNC, 10, {})), ({"Prefer": ""}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.SYNC, 10, {})), + ({"Prefer": "respond-async"}, None, + (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), ({"Prefer": "respond-async"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], (ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"})), ({"Prefer": "respond-async, wait=4"}, [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 0a431d122..1a8c715d2 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -31,7 +31,13 @@ ) from weaver.compat import Version from weaver.datatype import Job, Process, Service -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode +from weaver.execute import ( + ExecuteControlOption, + ExecuteMode, + ExecuteResponse, + ExecuteReturnPreference, + ExecuteTransmissionMode +) from weaver.formats import ContentType from weaver.notify import decrypt_email from weaver.processes.wps_testing import WpsTestProcess @@ -51,7 +57,7 @@ from typing import Any, Iterable, List, Optional, Tuple, Union from weaver.status import AnyStatusType - from weaver.typedefs import AnyLogLevel, JSON, Number, Statistics + from weaver.typedefs import AnyLogLevel, JobResults, JSON, Number, Statistics from weaver.visibility import AnyVisibility @@ -86,6 +92,9 @@ def setUp(self): self.process_public = WpsTestProcess(identifier="process-public") self.process_store.save_process(self.process_public) self.process_store.set_visibility(self.process_public.identifier, Visibility.PUBLIC) + proc_pub = self.process_store.fetch_by_id("process-public") + proc_pub["jobControlOptions"] = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] + self.process_store.save_process(proc_pub) self.process_private = WpsTestProcess(identifier="process-private") self.process_store.save_process(self.process_private) self.process_store.set_visibility(self.process_private.identifier, Visibility.PRIVATE) @@ -174,6 +183,7 @@ def make_job(self, exceptions=None, # type: Optional[List[JSON]] logs=None, # type: Optional[List[Union[str, Tuple[str, AnyLogLevel, AnyStatusType, Number]]]] statistics=None, # type: Optional[Statistics] + results=None, # type: Optional[JobResults] tags=None, # type: Optional[List[str]] add_info=True, # type: bool **job_params, # type: Any @@ -181,7 +191,7 @@ def make_job(self, if isinstance(created, str): created = date_parser.parse(created) job = self.job_store.save_job( - task_id=task_id, process=process, service=service, is_workflow=False, execute_async=True, user_id=user_id, + task_id=task_id, process=process, service=service, is_workflow=False, user_id=user_id, access=access, created=created, **job_params ) job.status = status @@ -201,6 +211,8 @@ def make_job(self, job.exceptions = exceptions if statistics is not None: job.statistics = statistics + if results is not None: + job.results = results if tags is not None: job.tags = tags job = self.job_store.update_job(job) @@ -1771,7 +1783,13 @@ def test_job_statistics_response(self): @pytest.mark.oap_part4 def test_job_inputs_response(self): - path = f"/jobs/{self.job_info[0].id}/inputs" + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.RUNNING, progress=50, access=Visibility.PRIVATE, context="test/context", + inputs={"test": "data"}, outputs={"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}}, + ) + + path = f"/jobs/{new_job.id}/inputs" resp = self.app.get(path, headers=self.json_headers) assert resp.status_code == 200 assert resp.json["inputs"] == {"test": "data"} @@ -1781,14 +1799,24 @@ def test_job_inputs_response(self): "Accept-Language": None, "Content-Type": None, "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", - "X-WPS-Output-Context": "public" + "X-WPS-Output-Context": "test/context", } + assert resp.json["subscribers"] == {} assert resp.json["mode"] == ExecuteMode.ASYNC assert resp.json["response"] == ExecuteResponse.DOCUMENT @pytest.mark.oap_part4 def test_job_outputs_response(self): - raise NotImplementedError # FIXME + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.SUCCEEDED, progress=100, access=Visibility.PRIVATE, context="test/context", + results=[{"id": "test", "value": "data"}], + ) + + path = f"/jobs/{new_job.id}/outputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["outputs"] == {"test": {"value": "data"}} @pytest.mark.oap_part4 @pytest.mark.xfail(reason="CWL PROV not implemented (https://github.com/crim-ca/weaver/issues/673)") @@ -1830,7 +1858,7 @@ def test_job_update_response(self): "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", "X-WPS-Output-Context": None, } - assert resp.json["mode"] == ExecuteMode.ASYNC + assert resp.json["mode"] == ExecuteMode.AUTO assert resp.json["response"] == ExecuteResponse.DOCUMENT # modify job definition @@ -1857,20 +1885,25 @@ def test_job_update_response(self): assert resp.status_code == 200 assert resp.json["inputs"] == {"test": "modified", "new": 123} assert resp.json["outputs"] == {"test": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}} - assert resp.json["subscribers"] == { - "successUri": "https://example.com/success", - "failedUri": "https://example.com/failed", - } assert resp.json["headers"] == { "Accept": None, "Accept-Language": None, "Content-Type": None, "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}; wait=5", - "X-WPS-Output-Context": "public" + "X-WPS-Output-Context": None } assert resp.json["mode"] == ExecuteMode.SYNC, "Should have been modified from 'wait' preference." assert resp.json["response"] == ExecuteResponse.RAW, "Should have been modified from 'return' preference." + assert "subscribers" not in resp.json, "Subscribers must not be exposed due to potentially sensible data" + test_job = self.job_store.fetch_by_id(new_job.id) + assert test_job.subscribers == { + "callbacks": { + Status.SUCCEEDED: "https://example.com/success", + Status.FAILED: "https://example.com/failed", + } + } + @pytest.mark.oap_part4 def test_job_update_response_process_disallowed(self): proc_id = self.fully_qualified_test_name() diff --git a/weaver/datatype.py b/weaver/datatype.py index 571f97f10..212bcf2aa 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -111,6 +111,7 @@ ExecutionInputs, ExecutionOutputs, ExecutionSubscribers, + JobResults, JSON, Link, Metadata, @@ -1079,7 +1080,7 @@ def execute_sync(self): @property def execution_mode(self): # type: () -> AnyExecuteMode - return ExecuteMode.get(self.get("execution_mode"), ExecuteMode.ASYNC) + return ExecuteMode.get(self.get("execution_mode"), ExecuteMode.AUTO) @execution_mode.setter def execution_mode(self, mode): @@ -1103,7 +1104,7 @@ def execution_wait(self): @execution_wait.setter def execution_wait(self, wait): # type: (Optional[int]) -> None - if wait is not None or not isinstance(wait, int): + if not (wait is None or isinstance(wait, int)): raise ValueError(f"Invalid value for '{self.__name__}.execution_wait'. Must be None or an integer.") self["execution_wait"] = wait @@ -1248,13 +1249,13 @@ def statistics(self, stats): self["statistics"] = stats def _get_results(self): - # type: () -> List[Optional[Dict[str, JSON]]] + # type: () -> JobResults if self.get("results") is None: self["results"] = [] return dict.__getitem__(self, "results") def _set_results(self, results): - # type: (List[Optional[Dict[str, JSON]]]) -> None + # type: (JobResults) -> None if not isinstance(results, list): raise TypeError(f"Type 'list' is required for '{self.__name__}.results'") self["results"] = results @@ -1465,7 +1466,7 @@ def links(self, container=None, self_link=None): job_path = base_url + sd.job_service.path.format(job_id=self.id) job_exec = f"{job_url.rsplit('/', 1)[0]}/execution" job_list = base_url + sd.jobs_service.path - job_links = [ + job_links = [ # type: List[Link] {"href": job_url, "rel": "status", "title": "Job status."}, # OGC {"href": job_url, "rel": "monitor", "title": "Job monitoring location."}, # IANA {"href": get_path_kvp(job_path, f=OutputFormat.JSON), "type": ContentType.APP_JSON, diff --git a/weaver/execute.py b/weaver/execute.py index 01c0b7581..6ed38136d 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -119,6 +119,7 @@ def parse_prefer_header_execute_mode( header_container, # type: AnyHeadersContainer supported_modes=None, # type: Optional[List[AnyExecuteControlOption]] wait_max=10, # type: int + return_auto=False, # type: bool ): # type: (...) -> Tuple[AnyExecuteMode, Optional[int], HeadersType] """ Obtain execution preference if provided in request headers. @@ -141,6 +142,10 @@ def parse_prefer_header_execute_mode( :param wait_max: Maximum wait time enforced by the server. If requested wait time is greater, ``wait`` preference will not be applied and will fall back to asynchronous response. + :param return_auto: + If the resolution ends up being an "auto" selection, the auto-resolved mode, wait-time, etc. are returned + by default. Using this option, the "auto" mode will be explicitly returned instead, allowing a mixture of + execution mode to be "auto" handled at another time. This is mostly for reporting purposes. :return: Tuple of resolved execution mode, wait time if specified, and header of applied preferences if possible. Maximum wait time indicates duration until synchronous response should fall back to asynchronous response. @@ -148,8 +153,9 @@ def parse_prefer_header_execute_mode( """ prefer = get_header("prefer", header_container) - relevant_modes = {ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC} - supported_modes = list(set(supported_modes or []).intersection(relevant_modes)) + relevant_modes = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] # order important, async default + supported_modes = relevant_modes if supported_modes is None else supported_modes + supported_modes = [mode for mode in supported_modes if mode in relevant_modes] if not prefer: # /req/core/process-execute-default-execution-mode (A & B) @@ -191,7 +197,7 @@ def parse_prefer_header_execute_mode( LOGGER.info("Requested Prefer wait header too large (%ss > %ss), revert to async execution.", wait, wait_max) return ExecuteMode.ASYNC, None, {} - auto = ExecuteMode.ASYNC if "respond-async" in params else ExecuteMode.SYNC + auto = ExecuteMode.ASYNC if "respond-async" in params else ExecuteMode.AUTO applied_preferences = [] # /req/core/process-execute-auto-execution-mode (A & B) if len(supported_modes) == 1: @@ -199,7 +205,7 @@ def parse_prefer_header_execute_mode( # otherwise, server is allowed to discard preference since it cannot be honoured mode = ExecuteMode.ASYNC if supported_modes[0] == ExecuteControlOption.ASYNC else ExecuteMode.SYNC wait = None if mode == ExecuteMode.ASYNC else wait - if auto == mode: + if auto in [mode, ExecuteMode.AUTO]: if auto == ExecuteMode.ASYNC: applied_preferences.append("respond-async") if wait and "wait" in params: @@ -218,11 +224,36 @@ def parse_prefer_header_execute_mode( return ExecuteMode.ASYNC, None, {"Preference-Applied": "respond-async"} if wait and "wait" in params: return ExecuteMode.SYNC, wait, {"Preference-Applied": f"wait={wait}"} + if auto == ExecuteMode.AUTO and return_auto: + return ExecuteMode.AUTO, None, {} if wait: # default used, not a supplied preference return ExecuteMode.SYNC, wait, {} return ExecuteMode.ASYNC, None, {} +def rebuild_prefer_header(job): + # type: (Job) -> Optional[str] + """ + Rebuilds the expected ``Prefer`` header value from :term:`Job` parameters. + """ + def append_header(header_value, new_value): + # type: (str, str) -> str + if header_value and new_value: + header_value += "; " + header_value += new_value + return header_value + + header = "" + if job.execution_return: + header = append_header(header, f"return={job.execution_return}") + if job.execution_wait: + header = append_header(header, f"wait={job.execution_wait}") + if job.execute_async: + header = append_header(header, "respond-async") + + return header or None + + def update_preference_applied_return_header( job, # type: Job request_headers, # type: Optional[AnyHeadersContainer] diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 70cff87da..96195673c 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -28,6 +28,8 @@ from weaver.execute import ( ExecuteControlOption, ExecuteMode, + ExecuteResponse, + ExecuteReturnPreference, parse_prefer_header_execute_mode, parse_prefer_header_return, update_preference_applied_return_header @@ -794,6 +796,7 @@ def submit_job_handler(payload, # type: ProcessExecution process = proc_store.fetch_by_id(process) if process and is_local: validate_process_io(process, json_body) + validate_process_id(process, json_body) else: LOGGER.warning( "Skipping validation of execution parameters for remote process [%s] on provider [%s]", @@ -809,11 +812,11 @@ def submit_job_handler(payload, # type: ProcessExecution exec_max_wait = as_int(exec_max_wait, default=20) mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts, exec_max_wait) if not applied: # whatever returned is a default, consider 'mode' in body as alternative - is_execute_async = ExecuteMode.get(json_body.get("mode")) != ExecuteMode.SYNC # convert auto to async + execute_mode = ExecuteMode.get(json_body.get("mode"), default=ExecuteMode.AUTO) else: # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 # Prefer header not resolved with a valid value should still resume without error - is_execute_async = mode != ExecuteMode.SYNC + execute_mode = mode accept_type = validate_job_accept_header(headers, mode) exec_resp, exec_return = get_job_return(job=None, body=json_body, headers=headers) # job 'None' since still parsing req_headers = copy.deepcopy(headers or {}) @@ -833,7 +836,7 @@ def submit_job_handler(payload, # type: ProcessExecution store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=job_status, process=process, service=provider_id, status=job_status, inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local, - execute_async=is_execute_async, execute_wait=wait, + execute_mode=execute_mode, execute_wait=wait, execute_response=exec_resp, execute_return=exec_return, custom_tags=tags, user_id=user, access=visibility, context=context, subscribers=subscribers, accept_type=accept_type, accept_language=language) @@ -950,24 +953,7 @@ def update_job_parameters(job, request): job_process = get_process(job.process) try: loc = "body" - if "process" in body: - # note: don't use 'get_process' for input process, as it might not even exist! - req_process_url = body["process"] - req_process_id = body["process"].rsplit("/processes/", 1)[-1] - if req_process_id != job_process.id or req_process_url != job_process.processDescriptionURL: - raise HTTPBadRequest( - json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ - "type": "InvalidJobUpdate", - "title": "Invalid Job Execution Update", - "detail": "Update of the reference process for the job execution is not permitted.", - "status": HTTPBadRequest.code, - "cause": {"name": "process", "in": loc}, - "value": repr_json({ - "body.process": body["process"], - "job.process": job_process.processDescriptionURL, - }, force_string=False), - }) - ) + validate_process_id(job_process, body) for node in sd.PatchJobBodySchema().children: field = node.name @@ -1013,14 +999,18 @@ def update_job_parameters(job, request): # if provided both in body and corresponding 'Prefer' header parameter, the body parameter takes precedence # however, if provided only in header, allow override of the body parameter considered as "higher priority" loc = "header" - if "mode" not in body: + if ExecuteMode.get(body.get("mode"), default=ExecuteMode.AUTO) == ExecuteMode.AUTO: mode, wait, _ = parse_prefer_header_execute_mode(request.headers, job_process.jobControlOptions) job.execution_mode = mode job.execution_wait = wait - if "response" in body: + if "response" not in body: job_return = parse_prefer_header_return(request.headers) if job_return: job.execution_return = job_return + if job_return == ExecuteReturnPreference.REPRESENTATION: + job.execution_response = ExecuteResponse.RAW + else: + job.execution_response = ExecuteResponse.DOCUMENT except HTTPException: raise @@ -1125,6 +1115,37 @@ def validate_job_accept_header(headers, execution_mode): ) +def validate_process_id(job_process, payload): + # type: (Process, ProcessExecution) -> None + """ + Validates that the specified ``process`` in the payload corresponds to the referenced :term:`Job` :term:`Process`. + + :raises HTTPException: Corresponding error for detected invalid combination of process references. + """ + if "process" in payload: + # note: don't use 'get_process' for input process, as it might not even exist! + req_process_url = payload["process"] + req_process_id = payload["process"].rsplit("/processes/", 1)[-1] + if req_process_id != job_process.id or req_process_url != job_process.processDescriptionURL: + raise HTTPBadRequest( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize( + { + "type": "InvalidJobUpdate", + "title": "Invalid Job Execution Update", + "detail": "Update of the reference process for the job execution is not permitted.", + "status": HTTPBadRequest.code, + "cause": {"name": "process", "in": "body"}, + "value": repr_json( + { + "body.process": payload["process"], + "job.process": job_process.processDescriptionURL, + }, force_string=False + ), + } + ) + ) + + def validate_process_io(process, payload): # type: (Process, ProcessExecution) -> None """ diff --git a/weaver/store/base.py b/weaver/store/base.py index 3ad9ab6a8..4e432e387 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -12,7 +12,7 @@ from pywps import Process as ProcessWPS from weaver.datatype import Bill, Job, Process, Quote, Service, VaultFile - from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference + from weaver.execute import AnyExecuteMode, AnyExecuteResponse, AnyExecuteReturnPreference from weaver.sort import AnySortType from weaver.status import AnyStatusSearch, AnyStatusType from weaver.typedefs import ( @@ -174,7 +174,7 @@ def save_job(self, outputs=None, # type: Optional[ExecutionOutputs] is_workflow=False, # type: bool is_local=False, # type: bool - execute_async=True, # type: bool + execute_mode=None, # type: Optional[AnyExecuteMode] execute_wait=None, # type: Optional[int] execute_response=None, # type: Optional[AnyExecuteResponse] execute_return=None, # type: Optional[AnyExecuteReturnPreference] diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index c8b3b4ce0..9b3b9b512 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -63,7 +63,7 @@ from pymongo.collection import Collection - from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference + from weaver.execute import AnyExecuteMode, AnyExecuteResponse, AnyExecuteReturnPreference from weaver.processes.types import AnyProcessType from weaver.sort import AnySortType from weaver.status import AnyStatusSearch, AnyStatusType @@ -790,7 +790,7 @@ def save_job(self, outputs=None, # type: Optional[ExecutionOutputs] is_workflow=False, # type: bool is_local=False, # type: bool - execute_async=True, # type: bool + execute_mode=None, # type: Optional[AnyExecuteMode] execute_wait=None, # type: Optional[int] execute_response=None, # type: Optional[AnyExecuteResponse] execute_return=None, # type: Optional[AnyExecuteReturnPreference] @@ -814,10 +814,10 @@ def save_job(self, tags.append(ProcessType.WORKFLOW) else: tags.append(ProcessType.APPLICATION) - if execute_async in [None, False] and execute_wait: + if execute_mode != ExecuteMode.ASYNC and execute_wait is not None: execute_mode = ExecuteMode.SYNC - else: - execute_mode = ExecuteMode.ASYNC + if execute_mode is None: + execute_mode = ExecuteMode.AUTO tags.append(execute_mode) if not access: access = Visibility.PRIVATE diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 2c5f41b63..466737ef5 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -1422,6 +1422,10 @@ def _schema_deserialize(self, cstruct, schema_meta, schema_id): return cstruct if not getattr(self, SchemaRefMappingSchema._extension, False): return cstruct + # don't inject the schema meta/id if the mapping is empty + # this is to avoid creating a non-empty mapping, which often as a "special" meaning + if not cstruct: + return cstruct schema_result = {} schema_fields = [("schema", schema_meta), ("id", schema_id)] diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 9bcb91ee8..79ba565a1 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -1,10 +1,9 @@ -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from box import Box from celery.utils.log import get_task_logger from colander import Invalid from pyramid.httpexceptions import ( - HTTPAccepted, HTTPBadRequest, HTTPOk, HTTPNoContent, @@ -17,6 +16,7 @@ from weaver.database import get_db from weaver.datatype import Job from weaver.exceptions import JobNotFound, JobStatisticsNotFound, ProcessNotFound, log_unhandled_exceptions +from weaver.execute import parse_prefer_header_execute_mode, rebuild_prefer_header from weaver.formats import ( ContentType, OutputFormat, @@ -485,30 +485,28 @@ def get_job_inputs(request): Retrieve the inputs values and outputs definitions of a job. """ job = get_job(request) - schema = cast( - "JobInputsOutputsSchemaType", - get_schema_query(request.params.get("schema"), strict=False, default=JobInputsOutputsSchema.OGC) - ) + schema = get_schema_query(request.params.get("schema"), strict=False, default=JobInputsOutputsSchema.OGC) job_inputs = job.inputs job_outputs = job.outputs if job.is_local: process = get_process(job.process, request=request) job_inputs = mask_process_inputs(process.package, job_inputs) job_inputs = convert_input_values_schema(job_inputs, schema) - job_outputs = convert_output_params_schema(job_outputs, schema) + job_outputs = convert_output_params_schema(job_outputs, schema) # type: ignore + job_prefer = rebuild_prefer_header(job) + job_mode, _, _ = parse_prefer_header_execute_mode({"Prefer": job_prefer}, return_auto=True) job_headers = { "Accept": job.accept_type, "Accept-Language": job.accept_language, - "Prefer": f"return={job.execution_return}" if job.execution_return else None, + "Prefer": job_prefer, "X-WPS-Output-Context": job.context, } body = { - "mode": job.execution_mode, + "mode": job_mode, "response": job.execution_response, "inputs": job_inputs, "outputs": job_outputs, "headers": job_headers, - "subscribers": job.subscribers, "links": job.links(request, self_link="inputs"), } body = sd.JobInputsBody().deserialize(body) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 00dcf7987..6cc523560 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2242,13 +2242,16 @@ class JobExecuteSubscribers(ExtendedMappingSchema): success_uri = URL( name="successUri", description="Location where to POST the job results on successful completion.", + # allow omitting against official schema to support partial use/update + # (see https://github.com/opengeospatial/ogcapi-processes/issues/460) + missing=drop, ) - failure_uri = URL( + failed_uri = URL( name="failedUri", description="Location where to POST the job status if it fails execution.", missing=drop, ) - started_uri = URL( + in_progress_uri = URL( name="inProgressUri", description="Location where to POST the job status once it starts execution.", missing=drop, @@ -2259,12 +2262,12 @@ class JobExecuteSubscribers(ExtendedMappingSchema): description="Email recipient to send a notification on successful job completion.", missing=drop, ) - failure_email = Email( + failed_email = Email( name="failedEmail", description="Email recipient to send a notification on failed job completion.", missing=drop, ) - started_email = Email( + in_progress_email = Email( name="inProgressEmail", description="Email recipient to send a notification of job status once it starts execution.", missing=drop, @@ -6073,8 +6076,7 @@ class JobInputsBody(ExecuteInputOutputs): # this makes it easier to consider everything that could be implied when executing the job mode = JobExecuteModeEnum(default=ExecuteMode.AUTO) response = JobResponseOptionsEnum(default=None) - headers = JobExecuteHeaders(missing={}) - subscribers = JobExecuteSubscribers(missing={}) + headers = JobExecuteHeaders(missing={}, default={}) links = LinkList(missing=drop) From 0ce6d2332578f2cf360f84e138a08bde985535b2 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 18:22:00 -0400 Subject: [PATCH 12/33] test fixes from previous commit changes --- tests/test_utils.py | 3 ++- weaver/wps_restapi/colander_extras.py | 9 +++++---- weaver/wps_restapi/jobs/jobs.py | 2 +- weaver/xml_util.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 96269fd83..14281efdf 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -412,8 +412,9 @@ def test_map_status_compliant(compliance, status): def test_map_status_back_compatibility_and_special_cases(): - for c in StatusCompliant: + for c in (set(StatusCompliant.values()) - {StatusCompliant.OPENEO}): # type: ignore assert map_status("successful", c) == Status.SUCCEEDED + assert map_status("successful", StatusCompliant.OPENEO) == Status.FINISHED def test_map_status_pywps_compliant_as_int_statuses(): diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 466737ef5..8c23337e2 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -1422,10 +1422,6 @@ def _schema_deserialize(self, cstruct, schema_meta, schema_id): return cstruct if not getattr(self, SchemaRefMappingSchema._extension, False): return cstruct - # don't inject the schema meta/id if the mapping is empty - # this is to avoid creating a non-empty mapping, which often as a "special" meaning - if not cstruct: - return cstruct schema_result = {} schema_fields = [("schema", schema_meta), ("id", schema_id)] @@ -1450,6 +1446,11 @@ def _deserialize_impl(self, cstruct): # pylint: disable=W0222,signature-differs """ Converts the data using validation against the :term:`JSON` schema definition. """ + # don't inject the schema meta/id if the mapping is empty + # this is to avoid creating a non-empty mapping, which often as a "special" meaning + # furthermore, when the mapping is empty, there is no data to ensuring this schema is actually applied + if not cstruct: + return cstruct # meta-schema always disabled in this context since irrelevant # refer to the "id" of the parent schema representing this data using "$schema" # this is not "official" JSON requirement, but very common in practice diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 79ba565a1..c49d7bb30 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -238,7 +238,7 @@ def create_job(request): prov_id = prov_parts[-1] if len(prov_parts) > 1 else None elif ctype in ContentType.ANY_XML: body_xml = xml_util.fromstring(request.text) - proc_id = body_xml.xpath("ows:Identifier", namespaces=body_xml.getroot().nsmap)[0].text + proc_id = body_xml.xpath("ows:Identifier", namespaces=body_xml.getroottree().nsmap)[0].text except Exception as exc: raise ProcessNotFound(json={ "title": "NoSuchProcess", diff --git a/weaver/xml_util.py b/weaver/xml_util.py index 8e19991f9..b0605a281 100644 --- a/weaver/xml_util.py +++ b/weaver/xml_util.py @@ -53,7 +53,7 @@ def fromstring(text, parser=XML_PARSER): - # type: (AnyStr, lxml_etree.XMLParser) -> XMLTree + # type: (AnyStr, lxml_etree.XMLParser) -> XML from weaver.utils import str2bytes return _lxml_fromstring(str2bytes(text), parser=parser) # nosec: B410 From 459e8263163918bb45163616c520c10e53b96795 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 18:39:23 -0400 Subject: [PATCH 13/33] fix invalid exception status handling of pywps --- tests/functional/test_builtin.py | 14 ++++++++------ weaver/status.py | 7 +++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 1b69ce723..55186cb66 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -21,8 +21,9 @@ from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.processes.builtin import file_index_selector, jsonarray2netcdf, metalink2netcdf, register_builtin_processes +from weaver.processes.constants import JobInputsOutputsSchema from weaver.status import Status -from weaver.utils import create_metalink, fully_qualified_name +from weaver.utils import create_metalink, fully_qualified_name, get_path_kvp from weaver.wps.utils import map_wps_output_location from weaver.wps_restapi import swagger_definitions as sd @@ -242,7 +243,7 @@ def test_jsonarray2netcdf_execute_async(self): assert resp.headers["Location"] == job_url results = self.monitor_job(job_url) - output_url = f"{job_url}/outputs" + output_url = get_path_kvp(f"{job_url}/outputs", schema=JobInputsOutputsSchema.OLD) resp = self.app.get(output_url, headers=self.json_headers) assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json @@ -288,7 +289,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_document(se # even though results are requested by Link reference, # Weaver still offers them with document on outputs endpoint - output_url = f"{job_url}/outputs" + output_url = get_path_kvp(f"{job_url}/outputs", schema=JobInputsOutputsSchema.OLD) resp = self.app.get(output_url, headers=self.json_headers) assert resp.status_code == 200, f"Error job outputs:\n{resp.text}" outputs = resp.json @@ -332,7 +333,7 @@ def test_jsonarray2netcdf_execute_async_output_by_value_response_raw(self): # even though results are requested by raw data, # Weaver still offers them with document on outputs endpoint - output_url = f"{job_url}/outputs" + output_url = get_path_kvp(f"{job_url}/outputs", schema=JobInputsOutputsSchema.OLD) resp = self.app.get(output_url, headers=self.json_headers) assert resp.status_code == 200, f"Error job outputs:\n{resp.text}" outputs = resp.json @@ -374,7 +375,8 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): # even though results are requested by Link reference, # Weaver still offers them with document on outputs endpoint - resp = self.app.get(f"{job_url}/outputs", headers=self.json_headers) + output_url = get_path_kvp(f"{job_url}/outputs", schema=JobInputsOutputsSchema.OLD) + resp = self.app.get(output_url, headers=self.json_headers) assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json @@ -442,7 +444,7 @@ def test_jsonarray2netcdf_execute_sync(self): assert resp.content_type == ContentType.APP_JSON results = resp.json - output_url = f"{job_url}/outputs" + output_url = get_path_kvp(f"{job_url}/outputs", schema=JobInputsOutputsSchema.OLD) resp = self.app.get(output_url, headers=self.json_headers) assert resp.status_code == 200, f"Error job outputs:\n{repr_json(resp.text, indent=2)}" outputs = resp.json diff --git a/weaver/status.py b/weaver/status.py index 5016cfc99..1984ef935 100644 --- a/weaver/status.py +++ b/weaver/status.py @@ -62,8 +62,7 @@ class Status(Constants): Status.STARTED, # running Status.SUCCEEDED, Status.FAILED, - Status.PAUSED, - Status.EXCEPTION + Status.PAUSED ]), StatusCompliant.OWSLIB: frozenset([ Status.ACCEPTED, @@ -204,12 +203,12 @@ def map_status(wps_status, compliant=StatusCompliant.OGC): job_status = Status.SUCCEEDED elif compliant == StatusCompliant.PYWPS: - if job_status in Status.RUNNING: + if job_status in [Status.RUNNING]: job_status = Status.STARTED elif job_status in [Status.DISMISSED, Status.CANCELED]: job_status = Status.FAILED elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FAILED]: - job_status = Status.EXCEPTION + job_status = Status.FAILED elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.PENDING]: job_status = Status.PAUSED elif job_status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: From ce3fc9b3a22f387f0e704f3a26b547e37592e28c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 19:46:00 -0400 Subject: [PATCH 14/33] fix /jobs/{jobId}/outputs handling against default behavior in tests --- .pylintrc | 1 + CHANGES.rst | 3 +++ tests/functional/test_builtin.py | 3 +++ tests/functional/utils.py | 5 +++-- tests/wps_restapi/test_providers.py | 2 -- weaver/status.py | 2 +- 6 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.pylintrc b/.pylintrc index d4d7d62d6..a99c2cf65 100644 --- a/.pylintrc +++ b/.pylintrc @@ -98,6 +98,7 @@ disable=C0111,missing-docstring, R0902,too-many-instance-attributes, R0904,too-many-public-methods, R0912,too-many-branches, + R0913,too-many-arguments, R0914,too-many-locals, R0915,too-many-statements, R0917,too-many-positional-arguments, diff --git a/CHANGES.rst b/CHANGES.rst index dc44e1667..92dbdb86c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,9 @@ Changes: -------- - Add support of *OGC API - Processes - Part 4: Job Management* endpoints for `Job` creation and execution (fixes `#716 `_). +- Align ``GET /jobs/{jobID}/outputs`` with requirements of *OGC API - Processes - Part 4: Job Management* endpoints + such that omitting the ``schema`` query parameter will automatically apply the `OGC` mapping representation by + default. Previous behavior was to return whichever representation that was used by the internal `Process` interface. - Add support of ``response: raw`` execution request body parameter as alternative to ``response: document``, which allows directly returning the result contents or ``Link`` headers rather then embedding them in a `JSON` response (fixes `#376 `_). diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 55186cb66..1b7e58c5d 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -266,6 +266,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_document(se with contextlib.ExitStack() as stack_exec: body, nc_data = self.setup_jsonarray2netcdf_inputs(stack_exec) body.update({ + "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, # by value/reference doesn't matter because of this "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], }) @@ -306,6 +307,7 @@ def test_jsonarray2netcdf_execute_async_output_by_value_response_raw(self): with contextlib.ExitStack() as stack_exec: body, nc_data = self.setup_jsonarray2netcdf_inputs(stack_exec) body.update({ + "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.RAW, # by value/reference important here # NOTE: quantity of outputs important as well # since single output, content-type is directly that output (otherwise should be multipart) @@ -352,6 +354,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): with contextlib.ExitStack() as stack_exec: body, nc_data = self.setup_jsonarray2netcdf_inputs(stack_exec) body.update({ + "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.RAW, # by value/reference important here "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], # Link header }) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index d51aa172f..0e6f14d8c 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -31,7 +31,7 @@ from weaver.processes.constants import JobInputsOutputsSchema, ProcessSchema from weaver.processes.wps_package import get_application_requirement from weaver.status import Status -from weaver.utils import fully_qualified_name, get_weaver_url, load_file +from weaver.utils import fully_qualified_name, get_weaver_url, load_file, get_path_kvp from weaver.visibility import Visibility if TYPE_CHECKING: @@ -530,7 +530,8 @@ def check_job_status(_resp, running=False): return resp.json def get_outputs(self, status_url): - resp = self.app.get(f"{status_url}/outputs", headers=dict(self.json_headers)) + path = get_path_kvp(f"{status_url}/outputs", schema=JobInputsOutputsSchema.OLD) + resp = self.app.get(path, headers=dict(self.json_headers)) body = resp.json pretty = json.dumps(body, indent=2, ensure_ascii=False) assert resp.status_code == 200, f"Get outputs failed:\n{pretty}\n{self._try_get_logs(status_url)}" diff --git a/tests/wps_restapi/test_providers.py b/tests/wps_restapi/test_providers.py index 8fee30831..584987bc1 100644 --- a/tests/wps_restapi/test_providers.py +++ b/tests/wps_restapi/test_providers.py @@ -1,5 +1,3 @@ -import unittest - import owslib import pytest from pyramid.httpexceptions import HTTPNotFound diff --git a/weaver/status.py b/weaver/status.py index 1984ef935..b0005c896 100644 --- a/weaver/status.py +++ b/weaver/status.py @@ -164,7 +164,7 @@ class Status(Constants): ] -def map_status(wps_status, compliant=StatusCompliant.OGC): +def map_status(wps_status, compliant=StatusCompliant.OGC): # pylint: disable=R1260 # type: (AnyStatusType, StatusCompliant) -> StatusType """ Maps WPS execution statuses to between compatible values of different implementations. From 7ee94421542a56c2eff2cb210fa03b33888ac618 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 19:54:50 -0400 Subject: [PATCH 15/33] fix test monitoring status logic --- tests/functional/test_builtin.py | 2 +- tests/functional/utils.py | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 1b7e58c5d..dd41a5270 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -210,7 +210,7 @@ def test_jsonarray2netcdf_execute_invalid_file_local(self): assert resp.status_code == 201 job_url = resp.json["location"] - job_res = self.monitor_job(job_url, expect_failed=True) + job_res = self.monitor_job(job_url, expect_failed=True, return_status=True) assert job_res["status"] == Status.FAILED job_logs = self.app.get(f"{job_url}/logs").json assert any("ValueError: Not a valid file URL reference" in log for log in job_logs) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 0e6f14d8c..8d9bdfe0b 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -494,20 +494,16 @@ def monitor_job(self, :return: result of the successful job, or the status body if requested. :raises AssertionError: when job fails or took too long to complete. """ - wait_for_status = wait_for_status or Status.SUCCEEDED + final_status = Status.FAILED if expect_failed else (wait_for_status or Status.SUCCEEDED) def check_job_status(_resp, running=False): # type: (AnyResponseType, bool) -> bool body = _resp.json pretty = json.dumps(body, indent=2, ensure_ascii=False) - if wait_for_status is None: - final_status = Status.FAILED if expect_failed else Status.SUCCEEDED - else: - final_status = wait_for_status statuses = [Status.ACCEPTED, Status.RUNNING, final_status] if running else [final_status] assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{self._try_get_logs(status_url)}" assert body["status"] in statuses, f"Error job info:\n{pretty}\n{self._try_get_logs(status_url)}" - return body["status"] in {wait_for_status, Status.SUCCEEDED, Status.FAILED} # break condition + return body["status"] in {final_status, Status.SUCCEEDED, Status.FAILED} # break condition time.sleep(1) # small delay to ensure process execution had a chance to start before monitoring left = timeout or self.monitor_timeout From 5e54a63bb1a52ec7d0a5dd30b50892c0acc51f59 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 19:55:29 -0400 Subject: [PATCH 16/33] fix imports linting --- tests/functional/test_wps_package.py | 4 ++-- tests/functional/utils.py | 2 +- tests/wps_restapi/test_providers.py | 3 +-- weaver/processes/execution.py | 2 +- weaver/wps_restapi/jobs/jobs.py | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 472270eb4..033dabda1 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -88,8 +88,8 @@ CWL_RequirementsDict, JSON, Number, - ProcessOfferingMapping, - ProcessOfferingListing + ProcessOfferingListing, + ProcessOfferingMapping ) EDAM_PLAIN = f"{EDAM_NAMESPACE}:{EDAM_MAPPING[ContentType.TEXT_PLAIN]}" diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 8d9bdfe0b..45f3e50de 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -31,7 +31,7 @@ from weaver.processes.constants import JobInputsOutputsSchema, ProcessSchema from weaver.processes.wps_package import get_application_requirement from weaver.status import Status -from weaver.utils import fully_qualified_name, get_weaver_url, load_file, get_path_kvp +from weaver.utils import fully_qualified_name, get_path_kvp, get_weaver_url, load_file from weaver.visibility import Visibility if TYPE_CHECKING: diff --git a/tests/wps_restapi/test_providers.py b/tests/wps_restapi/test_providers.py index 584987bc1..f3f1d248d 100644 --- a/tests/wps_restapi/test_providers.py +++ b/tests/wps_restapi/test_providers.py @@ -3,6 +3,7 @@ from pyramid.httpexceptions import HTTPNotFound from tests import resources +from tests.functional.utils import GenericUtils from tests.utils import ( get_test_weaver_app, mocked_remote_server_requests_wps1, @@ -18,8 +19,6 @@ from weaver.formats import ContentType from weaver.processes.constants import ProcessSchema -from tests.functional.utils import GenericUtils - class WpsProviderBase(GenericUtils): remote_provider_name = None diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 96195673c..71fcfc528 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -104,8 +104,8 @@ AnyResponseType, AnyServiceRef, AnySettingsContainer, - AnyViewResponse, AnyValueType, + AnyViewResponse, CeleryResult, HeaderCookiesType, HeadersType, diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index c49d7bb30..40e6982cd 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -5,8 +5,8 @@ from colander import Invalid from pyramid.httpexceptions import ( HTTPBadRequest, - HTTPOk, HTTPNoContent, + HTTPOk, HTTPPermanentRedirect, HTTPUnprocessableEntity, HTTPUnsupportedMediaType From 9eba01ff77be29b5f47c06bb07b7761290bcc014 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 21 Oct 2024 20:44:33 -0400 Subject: [PATCH 17/33] auto-resolve sync/async with max-wait --- CHANGES.rst | 10 ++++++++++ tests/functional/test_workflow.py | 3 ++- tests/wps_restapi/test_jobs.py | 3 ++- weaver/datatype.py | 2 +- weaver/execute.py | 3 ++- weaver/processes/execution.py | 5 ++++- weaver/store/mongodb.py | 2 -- 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 92dbdb86c..8962811e5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,16 @@ Changes: -------- - Add support of *OGC API - Processes - Part 4: Job Management* endpoints for `Job` creation and execution (fixes `#716 `_). +- Add ``headers``, ``mode`` and ``response`` parameters along the ``inputs`` and ``outputs`` returned by + the ``GET /jobs/{jobID}/inputs`` endpoint to better describe the expected resolution strategy of the + multiple `Job` execution options according to submitted request parameters. +- Increase flexible auto-resolution of *synchronous* vs *asynchronous* `Job` execution when no explicit strategy + is specified by ``mode`` body parameter or ``Prefer`` header. Situations where such flexible resolution can occur + will be reflected by a ``mode: auto`` and the absence of ``wait``/``respond-async`` in the ``Prefer`` header + within the response of the ``GET /jobs/{jobID}/inputs`` endpoint. +- Add support "on-trigger" `Job` submission using the ``status: create`` request body parameter. + Such a `Job` will be pending, and can be modified by ``PATCH /jobs/{jobID}`` requests, until execution is triggered + by a subsequent ``POST /jobs/{jobID}/results`` request. - Align ``GET /jobs/{jobID}/outputs`` with requirements of *OGC API - Processes - Part 4: Job Management* endpoints such that omitting the ``schema`` query parameter will automatically apply the `OGC` mapping representation by default. Previous behavior was to return whichever representation that was used by the internal `Process` interface. diff --git a/tests/functional/test_workflow.py b/tests/functional/test_workflow.py index c0ef941d9..5b9d12bca 100644 --- a/tests/functional/test_workflow.py +++ b/tests/functional/test_workflow.py @@ -37,7 +37,7 @@ ) from weaver import WEAVER_ROOT_DIR from weaver.config import WeaverConfiguration -from weaver.execute import ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode +from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteReturnPreference, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.processes.constants import ( CWL_REQUIREMENT_MULTIPLE_INPUT, @@ -919,6 +919,7 @@ def workflow_runner( # execute workflow execute_body = override_execute_body or workflow_info.execute_payload + execute_body.setdefault("mode", ExecuteMode.ASYNC) execute_path = f"{process_path}/jobs" self.assert_test(lambda: execute_body is not None, message="Cannot execute workflow without a request body!") diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 1a8c715d2..a01efd403 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1801,10 +1801,11 @@ def test_job_inputs_response(self): "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", "X-WPS-Output-Context": "test/context", } - assert resp.json["subscribers"] == {} assert resp.json["mode"] == ExecuteMode.ASYNC assert resp.json["response"] == ExecuteResponse.DOCUMENT + assert "subscribers" not in resp.json, "Subscribers must not be exposed due to potentially sensible data" + @pytest.mark.oap_part4 def test_job_outputs_response(self): new_job = self.make_job( diff --git a/weaver/datatype.py b/weaver/datatype.py index 212bcf2aa..6f20db7f6 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1097,7 +1097,7 @@ def execution_wait(self): """ Execution time (in seconds) to wait for a synchronous response. """ - if not self.execute_sync: + if self.execute_async: return None return self.get("execution_wait") diff --git a/weaver/execute.py b/weaver/execute.py index 6ed38136d..0d0cae1f8 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -166,7 +166,8 @@ def parse_prefer_header_execute_mode( wait = None if mode == ExecuteMode.ASYNC else wait_max return mode, wait, {} # /req/core/process-execute-default-execution-mode (C) - return ExecuteMode.SYNC, wait_max, {} + mode = ExecuteMode.AUTO if return_auto else ExecuteMode.SYNC + return mode, wait_max, {} # allow both listing of multiple 'Prefer' headers and single 'Prefer' header with multi-param ';' separated params = parse_kvp(prefer.replace(";", ","), pair_sep=",", multi_value_sep=None) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 71fcfc528..f5ab46ccf 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -810,7 +810,7 @@ def submit_job_handler(payload, # type: ProcessExecution job_ctl_opts = ExecuteControlOption.values() exec_max_wait = settings.get("weaver.execute_sync_max_wait", settings.get("weaver.exec_sync_max_wait")) exec_max_wait = as_int(exec_max_wait, default=20) - mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts, exec_max_wait) + mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts, exec_max_wait, return_auto=True) if not applied: # whatever returned is a default, consider 'mode' in body as alternative execute_mode = ExecuteMode.get(json_body.get("mode"), default=ExecuteMode.AUTO) else: @@ -1120,6 +1120,9 @@ def validate_process_id(job_process, payload): """ Validates that the specified ``process`` in the payload corresponds to the referenced :term:`Job` :term:`Process`. + If not ``process```is specified, no check is performed. The :term:`Job` is assumed to have pre-validated that + the :term:`Process` is appropriate from another reference, such as using the ID from the path or a query parameter. + :raises HTTPException: Corresponding error for detected invalid combination of process references. """ if "process" in payload: diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 9b3b9b512..ad70d01a8 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -814,8 +814,6 @@ def save_job(self, tags.append(ProcessType.WORKFLOW) else: tags.append(ProcessType.APPLICATION) - if execute_mode != ExecuteMode.ASYNC and execute_wait is not None: - execute_mode = ExecuteMode.SYNC if execute_mode is None: execute_mode = ExecuteMode.AUTO tags.append(execute_mode) From 9b17af6031e4482bbf7f7da371260d872c1649df Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 11:14:31 -0400 Subject: [PATCH 18/33] add schema query / profile content-type for openeo job status response --- setup.cfg | 1 + tests/wps_restapi/test_jobs.py | 2 + weaver/processes/constants.py | 11 ++++++ weaver/processes/convert.py | 12 +++++- weaver/status.py | 4 +- weaver/wps_restapi/jobs/jobs.py | 21 +++++++---- weaver/wps_restapi/jobs/utils.py | 46 ++++++++++++++++++++--- weaver/wps_restapi/swagger_definitions.py | 21 ++++++++++- 8 files changed, 99 insertions(+), 19 deletions(-) diff --git a/setup.cfg b/setup.cfg index 9987df2a1..35c398a31 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ markers = oap_part2: mark test as 'OGC API - Processes - Part 2: Deploy, Replace, Undeploy (DRU)' functionalities oap_part3: mark test as 'OGC API - Processes - Part 3: Workflows and Chaining' functionalities oap_part4: mark test as 'OGC API - Processes - Part 4: Job Management' functionalities + openeo: mark test as evaluating 'openEO' functionalities filterwarnings = ignore:No file specified for WPS-1 providers registration:RuntimeWarning ignore:.*configuration setting.*weaver\.cwl_processes_dir.*:RuntimeWarning diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index a01efd403..d6a3627e7 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1929,6 +1929,7 @@ def test_job_update_response_process_disallowed(self): } @pytest.mark.oap_part4 + @pytest.mark.openeo def test_job_status_alt_openeo_accept_response(self): """ Validate retrieval of :term:`Job` status response with alternate value mapping by ``Accept`` header. @@ -1967,6 +1968,7 @@ def test_job_status_alt_openeo_accept_response(self): assert resp.json["status"] == Status.QUEUED @pytest.mark.oap_part4 + @pytest.mark.openeo def test_job_status_alt_openeo_profile_response(self): """ Validate retrieval of :term:`Job` status response with alternate value mapping by ``profile`` query parameter. diff --git a/weaver/processes/constants.py b/weaver/processes/constants.py index 827ed4c3f..263a25da0 100644 --- a/weaver/processes/constants.py +++ b/weaver/processes/constants.py @@ -365,6 +365,9 @@ class OpenSearchField(Constants): JobInputsOutputsSchemaAnyOGCType = Union[JobInputsOutputsSchemaType_OGC, JobInputsOutputsSchemaType_OGC_STRICT] JobInputsOutputsSchemaAnyOLDType = Union[JobInputsOutputsSchemaType_OLD, JobInputsOutputsSchemaType_OLD_STRICT] JobInputsOutputsSchemaType = Union[JobInputsOutputsSchemaAnyOGCType, JobInputsOutputsSchemaAnyOLDType] +JobStatusSchemaType_OGC = Literal["OGC", "ogc"] +JobStatusSchemaType_OpenEO = Literal["OPENEO", "openeo", "openEO", "OpenEO"] +JobStatusSchemaType = Union[JobStatusSchemaType_OGC, JobStatusSchemaType_OpenEO] class ProcessSchema(Constants): @@ -386,6 +389,14 @@ class JobInputsOutputsSchema(Constants): OLD = "old" # type: JobInputsOutputsSchemaType_OLD +class JobStatusSchema(Constants): + """ + Schema selector to represent a :term:`Job` status response. + """ + OGC = "ogc" # type: JobStatusSchemaType_OGC + OPENEO = "openeo" # type: JobStatusSchemaType_OpenEO + + if TYPE_CHECKING: # pylint: disable=invalid-name CWL_RequirementNames = Literal[ diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 2810635cc..ed0f8c297 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -1911,17 +1911,25 @@ def convert_input_values_schema(inputs, schema): @overload -def convert_output_params_schema(inputs, schema): +def convert_output_params_schema(outputs, schema): # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaAnyOGCType) -> Optional[ExecutionOutputsMap] ... @overload -def convert_output_params_schema(inputs, schema): +def convert_output_params_schema(outputs, schema): # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaAnyOLDType) -> Optional[ExecutionOutputsList] ... +# FIXME: workaround typing duplicate +# (https://youtrack.jetbrains.com/issue/PY-76786/Typing-literal-with-overload-fails-to-consider-non-overloaded-type) +@overload +def convert_output_params_schema(outputs, schema): + # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaType) -> Optional[ExecutionOutputs] + ... + + def convert_output_params_schema(outputs, schema): # type: (Optional[ExecutionOutputs], JobInputsOutputsSchemaType) -> Optional[ExecutionOutputs] """ diff --git a/weaver/status.py b/weaver/status.py index b0005c896..6ecf64ca9 100644 --- a/weaver/status.py +++ b/weaver/status.py @@ -52,9 +52,9 @@ class Status(Constants): Status.CREATED, # Part 4: Job Management Status.ACCEPTED, Status.RUNNING, - Status.SUCCEEDED, # old (keep it because it matches existing ADES/EMS and other providers) + Status.SUCCEEDED, # new Status.FAILED, - Status.SUCCESSFUL, # new + Status.SUCCESSFUL, # old (keep it because it matches existing ADES/EMS and other providers) Status.DISMISSED # new ]), StatusCompliant.PYWPS: frozenset([ diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 40e6982cd..dfafa8c44 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -1,3 +1,4 @@ +from moto.batch.utils import JobStatus from typing import TYPE_CHECKING from box import Box @@ -25,7 +26,7 @@ guess_target_format, repr_json ) -from weaver.processes.constants import JobInputsOutputsSchema +from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema from weaver.processes.execution import ( submit_job, @@ -35,7 +36,7 @@ ) from weaver.processes.utils import get_process from weaver.processes.wps_package import mask_process_inputs -from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory +from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, StatusCompliant, map_status from weaver.store.base import StoreJobs from weaver.utils import get_header, get_settings, make_link_header from weaver.wps_restapi import swagger_definitions as sd @@ -45,7 +46,8 @@ get_job_list_links, get_job_results_response, get_results, - get_schema_query, + get_job_io_schema_query, + get_job_status_schema, raise_job_bad_status_locked, raise_job_bad_status_success, raise_job_dismissed, @@ -320,8 +322,11 @@ def get_job_status(request): Retrieve the status of a job. """ job = get_job(request) - job_status = job.json(request) - return HTTPOk(json=job_status) + job_body = job.json(request) + schema = get_job_status_schema(request) + if schema == JobStatusSchema.OPENEO: + job_body["status"] = map_status(job_body["status"], StatusCompliant.OPENEO) + return HTTPOk(json=job_body) @sd.provider_job_service.patch( @@ -485,14 +490,14 @@ def get_job_inputs(request): Retrieve the inputs values and outputs definitions of a job. """ job = get_job(request) - schema = get_schema_query(request.params.get("schema"), strict=False, default=JobInputsOutputsSchema.OGC) + schema = get_job_io_schema_query(request.params.get("schema"), strict=False, default=JobInputsOutputsSchema.OGC) job_inputs = job.inputs job_outputs = job.outputs if job.is_local: process = get_process(job.process, request=request) job_inputs = mask_process_inputs(process.package, job_inputs) job_inputs = convert_input_values_schema(job_inputs, schema) - job_outputs = convert_output_params_schema(job_outputs, schema) # type: ignore + job_outputs = convert_output_params_schema(job_outputs, schema) job_prefer = rebuild_prefer_header(job) job_mode, _, _ = parse_prefer_header_execute_mode({"Prefer": job_prefer}, return_auto=True) job_headers = { @@ -543,7 +548,7 @@ def get_job_outputs(request): job = get_job(request) raise_job_dismissed(job, request) raise_job_bad_status_success(job, request) - schema = get_schema_query(request.params.get("schema"), default=JobInputsOutputsSchema.OGC) + schema = get_job_io_schema_query(request.params.get("schema"), default=JobInputsOutputsSchema.OGC) results, _ = get_results(job, request, schema=schema, link_references=False) outputs = {"outputs": results} outputs.update({"links": job.links(request, self_link="outputs")}) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 0bd870651..d8d11070a 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -3,7 +3,7 @@ import os import shutil from copy import deepcopy -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, cast, overload import colander from celery.utils.log import get_task_logger @@ -42,7 +42,7 @@ ) from weaver.formats import ContentEncoding, ContentType, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound -from weaver.processes.constants import JobInputsOutputsSchema +from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreJobs, StoreProcesses, StoreServices @@ -54,12 +54,14 @@ get_header, get_href_headers, get_path_kvp, + get_request_args, get_sane_name, get_secure_path, get_settings, get_weaver_url, is_uuid, - make_link_header + make_link_header, + parse_kvp ) from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location @@ -72,7 +74,7 @@ from weaver.execute import AnyExecuteResponse, AnyExecuteReturnPreference, AnyExecuteTransmissionMode from weaver.formats import AnyContentEncoding - from weaver.processes.constants import JobInputsOutputsSchemaType + from weaver.processes.constants import JobInputsOutputsSchemaType, JobStatusSchemaType from weaver.typedefs import ( AnyDataStream, AnyHeadersContainer, @@ -280,8 +282,17 @@ def get_job_list_links(job_total, filters, request): return links -def get_schema_query( - schema, # type: Optional[JobInputsOutputsSchemaType] +@overload +def get_job_io_schema_query( + schema, # type: Optional[str] + strict=True, # type: bool + default=None, # type: JobInputsOutputsSchemaType +): # type: (...) -> JobInputsOutputsSchemaType + ... + + +def get_job_io_schema_query( + schema, # type: Optional[str] strict=True, # type: bool default=None, # type: Optional[JobInputsOutputsSchemaType] ): # type: (...) -> Optional[JobInputsOutputsSchemaType] @@ -305,6 +316,29 @@ def get_schema_query( return schema_checked +def get_job_status_schema(request): + # type: (AnyRequestType) -> JobStatusSchemaType + """ + Identifies if a :term:`Job` status response schema applies for the request. + """ + params = get_request_args(request) + schema = JobStatusSchema.get(params.get("schema")) + if schema: + return schema + ctype = get_header("Content-Type", request.headers) + if not ctype: + return JobStatusSchema.OGC + params = parse_kvp(ctype) + profile = params.get("profile") + if not profile: + return JobStatusSchema.OGC + schema = cast( + "JobStatusSchemaType", + JobStatusSchema.get(profile, default=JobStatusSchema.OGC) + ) + return schema + + def make_result_link( job, # type: Job result, # type: ExecutionResultValue diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 6cc523560..a187c6fbd 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -102,6 +102,7 @@ PACKAGE_TYPE_POSSIBLE_VALUES, WPS_LITERAL_DATA_TYPES, JobInputsOutputsSchema, + JobStatusSchema, ProcessSchema ) from weaver.quotation.status import QuoteStatus @@ -3264,12 +3265,26 @@ class ProcessVisibilityPutEndpoint(LocalProcessPath): body = VisibilitySchema() +class GetJobQuery(ExtendedMappingSchema): + schema = ExtendedSchemaNode( + String(), + title="JobStatusQuerySchema", + example=JobStatusSchema.OGC, + default=JobStatusSchema.OGC, + validator=OneOfCaseInsensitive(JobStatusSchema.values()), + summary="Job status schema representation.", + description="Selects the schema employed for representation of returned job status response.", + ) + + class GetProviderJobEndpoint(ProviderProcessPath, JobPath): header = RequestHeaders() + querystring = GetJobQuery() class GetJobEndpoint(JobPath): header = RequestHeaders() + querystring = GetJobQuery() class ProcessInputsEndpoint(LocalProcessPath, JobPath): @@ -6717,9 +6732,13 @@ class DeleteProviderJobsEndpoint(DeleteJobsEndpoint, ProviderProcessPath): pass +class GetProcessJobQuery(LocalProcessQuery, GetJobQuery): + pass + + class GetProcessJobEndpoint(LocalProcessPath): header = RequestHeaders() - querystring = LocalProcessQuery() + querystring = GetProcessJobQuery() class DeleteJobEndpoint(JobPath): From f4f1187f6b563c6cb34e956d9adee2bbcefd1818 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 14:40:43 -0400 Subject: [PATCH 19/33] add job status schema from query param or accept profile --- tests/wps_restapi/test_jobs.py | 48 +++++++++++++---------- weaver/typedefs.py | 3 ++ weaver/wps_restapi/jobs/jobs.py | 19 ++++++--- weaver/wps_restapi/jobs/utils.py | 33 +++++++++++----- weaver/wps_restapi/patches.py | 40 +++++++++++++++++-- weaver/wps_restapi/swagger_definitions.py | 27 +++++++------ 6 files changed, 122 insertions(+), 48 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index d6a3627e7..1fcb29a31 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1934,33 +1934,37 @@ def test_job_status_alt_openeo_accept_response(self): """ Validate retrieval of :term:`Job` status response with alternate value mapping by ``Accept`` header. """ - assert self.job_info[0].status == Status.SUCCEEDED, "Precondition invalid." + job = self.job_info[0] + assert job.status == Status.SUCCEEDED, "Precondition invalid." headers = {"Accept": "application/json; profile=openeo"} - path = f"/jobs/{self.job_info[0].id}" + path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.FINISHED - assert self.job_info[0].status == Status.FAILED, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" + job = self.job_info[1] + assert job.status == Status.FAILED, "Precondition invalid." + path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.ERROR - assert self.job_info[9].status == Status.RUNNING, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" + job = self.job_info[9] + assert job.status == Status.RUNNING, "Precondition invalid." + path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.RUNNING - assert self.job_info[11].status == Status.ACCEPTED, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" + job = self.job_info[11] + assert job.status == Status.ACCEPTED, "Precondition invalid." + path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" @@ -1973,33 +1977,37 @@ def test_job_status_alt_openeo_profile_response(self): """ Validate retrieval of :term:`Job` status response with alternate value mapping by ``profile`` query parameter. """ - assert self.job_info[0].status == Status.SUCCEEDED, "Precondition invalid." - path = f"/jobs/{self.job_info[0].id}" - resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + job = self.job_info[0] + assert job.status == Status.SUCCEEDED, "Precondition invalid." + path = f"/jobs/{job.id}" + resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.FINISHED - assert self.job_info[0].status == Status.FAILED, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" - resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + job = self.job_info[1] + assert job.status == Status.FAILED, "Precondition invalid." + path = f"/jobs/{job.id}" + resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.ERROR - assert self.job_info[9].status == Status.RUNNING, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" - resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + job = self.job_info[9] + assert job.status == Status.RUNNING, "Precondition invalid." + path = f"/jobs/{job.id}" + resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.RUNNING - assert self.job_info[11].status == Status.ACCEPTED, "Precondition invalid." - path = f"/jobs/{self.job_info[1].id}" - resp = self.app.get(path, headers=self.json_headers, params={"profile": "openeo"}) + job = self.job_info[11] + assert job.status == Status.ACCEPTED, "Precondition invalid." + path = f"/jobs/{job.id}" + resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 assert resp.headers["Content-Type"] == "application/json; profile=openeo" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL diff --git a/weaver/typedefs.py b/weaver/typedefs.py index dac897dc1..76e146120 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -379,6 +379,9 @@ class CWL_SchemaName(Protocol): AnyResponseType = Union[PyramidResponse, WebobResponse, RequestsResponse, TestResponse] AnyResponseClass = Union[PyramidResponse, WebobResponse, HTTPException] AnyViewResponse = Union[AnyResponseClass, JSON] + AnyViewCallableContextRequest = Callable[[Any, AnyRequestType], AnyViewResponse] + AnyViewCallableRequestOnly = Callable[[AnyRequestType], AnyViewResponse] + AnyViewCallable = Union[AnyViewCallableContextRequest, AnyViewCallableRequestOnly] RequestMethod = Literal[ "HEAD", "GET", "POST", "PUT", "PATCH", "DELETE", "head", "get", "post", "put", "patch", "delete", diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index dfafa8c44..e13af3ed0 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -297,21 +297,30 @@ def trigger_job_execution(request): @sd.provider_job_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATUS, sd.TAG_PROVIDERS], schema=sd.GetProviderJobEndpoint(), - accept=ContentType.APP_JSON, + accept=[ContentType.APP_JSON] + [ + f"{ContentType.APP_JSON}; profile={profile}" + for profile in JobStatusSchema.values() + ], renderer=OutputFormat.JSON, response_schemas=sd.get_prov_single_job_status_responses, ) @sd.process_job_service.get( tags=[sd.TAG_PROCESSES, sd.TAG_JOBS, sd.TAG_STATUS], schema=sd.GetProcessJobEndpoint(), - accept=ContentType.APP_JSON, + accept=[ContentType.APP_JSON] + [ + f"{ContentType.APP_JSON}; profile={profile}" + for profile in JobStatusSchema.values() + ], renderer=OutputFormat.JSON, response_schemas=sd.get_single_job_status_responses, ) @sd.job_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATUS], schema=sd.GetJobEndpoint(), - accept=ContentType.APP_JSON, + accept=[ContentType.APP_JSON] + [ + f"{ContentType.APP_JSON}; profile={profile}" + for profile in JobStatusSchema.values() + ], renderer=OutputFormat.JSON, response_schemas=sd.get_single_job_status_responses, ) @@ -323,10 +332,10 @@ def get_job_status(request): """ job = get_job(request) job_body = job.json(request) - schema = get_job_status_schema(request) + schema, headers = get_job_status_schema(request) if schema == JobStatusSchema.OPENEO: job_body["status"] = map_status(job_body["status"], StatusCompliant.OPENEO) - return HTTPOk(json=job_body) + return HTTPOk(json=job_body, headers=headers) @sd.provider_job_service.patch( diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index d8d11070a..9c085bad3 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -40,7 +40,7 @@ parse_prefer_header_return, update_preference_applied_return_header ) -from weaver.formats import ContentEncoding, ContentType, get_format, repr_json +from weaver.formats import ContentEncoding, ContentType, get_format, repr_json, clean_media_type_format from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field @@ -317,26 +317,41 @@ def get_job_io_schema_query( def get_job_status_schema(request): - # type: (AnyRequestType) -> JobStatusSchemaType + # type: (AnyRequestType) -> Tuple[JobStatusSchemaType, HeadersType] """ Identifies if a :term:`Job` status response schema applies for the request. """ + def make_headers(resolved_schema): + content_accept = request.accept.header_value or ContentType.APP_JSON + content_type = clean_media_type_format(content_accept, strip_parameters=True) + content_profile = f"{content_type}; profile={resolved_schema}" + content_headers = {"Content-Type": content_profile} + if resolved_schema == JobStatusSchema.OGC: + content_headers["Content-Schema"] = sd.OGC_API_SCHEMA_JOB_STATUS_URL + elif resolved_schema == JobStatusSchema.OPENEO: + content_headers["Content-Schema"] = sd.OPENEO_API_SCHEMA_JOB_STATUS_URL + return content_headers + params = get_request_args(request) - schema = JobStatusSchema.get(params.get("schema")) + schema = JobStatusSchema.get(params.get("profile") or params.get("schema")) if schema: - return schema - ctype = get_header("Content-Type", request.headers) + headers = make_headers(schema) + return schema, headers + ctype = get_header("Accept", request.headers) if not ctype: - return JobStatusSchema.OGC + return JobStatusSchema.OGC, {} params = parse_kvp(ctype) profile = params.get("profile") if not profile: - return JobStatusSchema.OGC + schema = JobStatusSchema.OGC + headers = make_headers(schema) + return schema, headers schema = cast( "JobStatusSchemaType", - JobStatusSchema.get(profile, default=JobStatusSchema.OGC) + JobStatusSchema.get(profile[0], default=JobStatusSchema.OGC) ) - return schema + headers = make_headers(schema) + return schema, headers def make_result_link( diff --git a/weaver/wps_restapi/patches.py b/weaver/wps_restapi/patches.py index ed0df1a3c..8f18588d7 100644 --- a/weaver/wps_restapi/patches.py +++ b/weaver/wps_restapi/patches.py @@ -4,13 +4,15 @@ import contextlib from typing import TYPE_CHECKING -from cornice import Service as ServiceAutoGetHead +from cornice import Service as CorniceService from pyramid.config import Configurator as PyramidConfigurator from pyramid.predicates import RequestMethodPredicate from pyramid.util import as_sorted_tuple if TYPE_CHECKING: - from typing import Any, Tuple, Union + from typing import Any, Callable, Optional, Sequence, Tuple, Union + + from weaver.typedefs import AnyViewCallable, RequestMethod class Configurator(PyramidConfigurator): @@ -68,7 +70,33 @@ def append(self, __object): super(NoAutoHeadList, self).append(__object) -class ServiceOnlyExplicitGetHead(ServiceAutoGetHead): +class ServiceAutoAcceptDecorator(CorniceService): + """ + Extends the view :meth:`decorator` to allow multiple ``accept`` headers provided all at once. + + The base :class:`CorniceService` only allows a single ``accept`` header value, which forces repeating the entire + parameters over multiple separate decorator calls. + """ + + def decorator(self, method, accept=None, **kwargs): + # type: (RequestMethod, Optional[str, Sequence[str]], Any) -> Callable[[AnyViewCallable], AnyViewCallable] + if isinstance(accept, str) or accept is None: + return super().decorator(method, accept=accept, **kwargs) + + if not hasattr(accept, "__iter__") or not all(isinstance(header, str) for header in accept): + raise ValueError("Service decorator parameter 'accept' must be a single string or a sequence of strings.") + + def wrapper(view): + # type: (AnyViewCallable) -> AnyViewCallable + for header in accept: + wrap_view = CorniceService.decorator(self, method, accept=header, **kwargs) + wrap_view(view) + return view + + return wrapper + + +class ServiceOnlyExplicitGetHead(CorniceService): """ Service that disallow the auto-insertion of HTTP HEAD method view when HTTP GET view is defined. @@ -99,6 +127,12 @@ def add_view(self, method, view, **kwargs): super(ServiceOnlyExplicitGetHead, self).add_view(method, view, **kwargs) +class WeaverService(ServiceAutoAcceptDecorator, ServiceOnlyExplicitGetHead): + """ + Service that combines all respective capabilities required by :mod:`weaver`. + """ + + class RequestMethodPredicateNoGetHead(RequestMethodPredicate): # pylint: disable=W0231,super-init-not-called # whole point of this init is to bypass original behavior diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index a187c6fbd..b19fbf356 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -139,7 +139,7 @@ XMLObject ) from weaver.wps_restapi.constants import ConformanceCategory -from weaver.wps_restapi.patches import ServiceOnlyExplicitGetHead as Service # warning: don't use 'cornice.Service' +from weaver.wps_restapi.patches import WeaverService as Service # warning: don't use 'cornice.Service' if TYPE_CHECKING: from typing import Any, Dict, Type, Union @@ -222,6 +222,8 @@ OGC_API_BBOX_FORMAT = "ogc-bbox" # equal CRS:84 and EPSG:4326, equivalent to WGS84 with swapped lat-lon order OGC_API_BBOX_EPSG = "EPSG:4326" +OGC_API_SCHEMA_JOB_STATUS_URL = f"{OGC_API_PROC_PART1_SCHEMAS}/statusInfo.yaml" + OPENEO_API_SCHEMA_URL = "https://openeo.org/documentation/1.0/developers/api/openapi.yaml" OPENEO_API_SCHEMA_JOB_STATUS_URL = f"{OPENEO_API_SCHEMA_URL}#/components/schemas/batch_job" @@ -3265,16 +3267,19 @@ class ProcessVisibilityPutEndpoint(LocalProcessPath): body = VisibilitySchema() +class JobStatusQueryProfileSchema(ExtendedSchemaNode): + summary = "Job status schema representation." + description = "Selects the schema employed for representation of returned job status response." + schema_type = String + title = "JobStatusQuerySchema" + example = JobStatusSchema.OGC + default = JobStatusSchema.OGC + validator = OneOfCaseInsensitive(JobStatusSchema.values()) + + class GetJobQuery(ExtendedMappingSchema): - schema = ExtendedSchemaNode( - String(), - title="JobStatusQuerySchema", - example=JobStatusSchema.OGC, - default=JobStatusSchema.OGC, - validator=OneOfCaseInsensitive(JobStatusSchema.values()), - summary="Job status schema representation.", - description="Selects the schema employed for representation of returned job status response.", - ) + schema = JobStatusQueryProfileSchema(missing=drop) + profile = JobStatusQueryProfileSchema(missing=drop) class GetProviderJobEndpoint(ProviderProcessPath, JobPath): @@ -3735,7 +3740,7 @@ def deserialize(self, cstruct): class JobStatusInfo(ExtendedMappingSchema): - _schema = f"{OGC_API_PROC_PART1_SCHEMAS}/statusInfo.yaml" + _schema = OGC_API_SCHEMA_JOB_STATUS_URL jobID = JobID() processID = ProcessIdentifierTag(missing=None, default=None, description="Process identifier corresponding to the job execution.") From 1c8a2e037bac2fd357cb12d16d8e647cc0b16545 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 19:45:51 -0400 Subject: [PATCH 20/33] add missing part 4 requirements --- weaver/processes/execution.py | 4 +- weaver/processes/utils.py | 69 ++++++++++++++++++++++++++++-- weaver/wps_restapi/api.py | 21 ++++++++- weaver/wps_restapi/jobs/jobs.py | 2 +- weaver/wps_restapi/utils.py | 75 +-------------------------------- 5 files changed, 89 insertions(+), 82 deletions(-) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index f5ab46ccf..cb3d3c94c 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -1043,7 +1043,7 @@ def validate_job_json(request): """ if ContentType.APP_JSON not in request.content_type: raise HTTPUnsupportedMediaType(json={ - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type", "title": "Unsupported Media-Type", "detail": f"Request 'Content-Type' header other than '{ContentType.APP_JSON}' is not supported.", "code": "InvalidHeaderValue", @@ -1054,7 +1054,7 @@ def validate_job_json(request): json_body = request.json_body except Exception as ex: raise HTTPBadRequest(json={ - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type", "title": "Bad Request", "detail": f"Invalid JSON body cannot be decoded for job submission. [{ex}]", }) diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index 610881d95..13f7acaa8 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -17,9 +17,11 @@ HTTPCreated, HTTPException, HTTPForbidden, + HTTPInternalServerError, HTTPNotFound, HTTPOk, - HTTPUnprocessableEntity + HTTPUnprocessableEntity, + HTTPUnsupportedMediaType ) from pyramid.settings import asbool @@ -70,7 +72,7 @@ from weaver.wps.utils import get_wps_client from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.utils import resolve_process_tag -from weaver.wps_restapi.utils import get_wps_restapi_base_url, parse_content +from weaver.wps_restapi.utils import get_wps_restapi_base_url LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: @@ -326,6 +328,65 @@ def resolve_cwl_graph(package): return package +def parse_process_deploy_content( + request=None, # type: Optional[AnyRequestType] + content=None, # type: Optional[Union[JSON, str]] + content_schema=None, # type: Optional[colander.SchemaNode] + content_type=sd.RequestContentTypeHeader.default, # type: Optional[ContentType] + content_type_schema=sd.RequestContentTypeHeader, # type: Optional[colander.SchemaNode] +): # type: (...) -> Union[JSON, CWL] + """ + Load the request content with validation of expected content type and their schema. + """ + if request is None and content is None: # pragma: no cover # safeguard for early detect invalid implementation + raise HTTPInternalServerError(json={ + "title": "Internal Server Error", + "type": "InternalServerError", + "detail": "Cannot parse undefined contents.", + "status": HTTPInternalServerError.code, + "cause": "Request content and content argument are undefined.", + }) + try: + if request is not None: + content = request.text + content_type = request.content_type + if content_type is not None and content_type_schema is not None: + content_type = content_type_schema().deserialize(content_type) + if isinstance(content, str): + content = yaml.safe_load(content) + if not isinstance(content, dict): + raise TypeError("Not a valid JSON body for process deployment.") + except colander.Invalid as exc: + raise HTTPUnsupportedMediaType(json={ + "title": "Unsupported Media Type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "detail": str(exc), + "status": HTTPUnsupportedMediaType.code, + "cause": {"Content-Type": None if content_type is None else str(content_type)}, + }) + except Exception as exc: + raise HTTPBadRequest(json={ + "title": "Bad Request", + "type": "BadRequest", + "detail": "Unable to parse contents.", + "status": HTTPBadRequest.code, + "cause": str(exc), + }) + try: + if content_schema is not None: + content = content_schema().deserialize(content) + except colander.Invalid as exc: + raise HTTPUnprocessableEntity(json={ + "type": "InvalidParameterValue", + "title": "Failed schema validation.", + "status": HTTPUnprocessableEntity.code, + "error": colander.Invalid.__name__, + "cause": exc.msg, + "value": repr_json(exc.value, force_string=False), + }) + return content + + def deploy_process_from_payload(payload, container, overwrite=False): # pylint: disable=R1260,too-complex # type: (Union[JSON, str], Union[AnySettingsContainer, AnyRequestType], Union[bool, Process]) -> HTTPException """ @@ -351,7 +412,7 @@ def deploy_process_from_payload(payload, container, overwrite=False): # pylint: c_type = ContentType.get(get_header("Content-Type", headers), default=ContentType.APP_OGC_PKG_JSON) # use deepcopy of to remove any circular dependencies before writing to mongodb or any updates to the payload - payload = parse_content( + payload = parse_process_deploy_content( request=None, content=payload, content_type=c_type, @@ -836,7 +897,7 @@ def update_process_metadata(request): Desired new version can be eiter specified explicitly in request payload, or will be guessed accordingly to detected changes to be applied. """ - data = parse_content(request, content_schema=sd.PatchProcessBodySchema) + data = parse_process_deploy_content(request, content_schema=sd.PatchProcessBodySchema) old_process = get_process(request=request) new_process = copy.deepcopy(old_process) update_level = _apply_process_metadata(new_process, data) diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 59de68a20..da11e1198 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -529,10 +529,27 @@ def get_conformance(category, settings): f"{ogcapi_proc_part4}/per/job-management/update-content-schema", # FIXME: support part 3: Nested Workflow Execution request (https://github.com/crim-ca/weaver/issues/412) # f"{ogcapi_proc_part4}/rec/job-management/create-body-ogcapi-processes", + # f"{ogcapi_proc_part4}/rec/job-management/update-body-ogcapi-processes", # FIXME: support openEO processes (https://github.com/crim-ca/weaver/issues/564) # f"{ogcapi_proc_part4}/rec/job-management/create-body-openeo", - f"{ogcapi_proc_part4}/req/job-management/create/post-op", - f"{ogcapi_proc_part4}/req/job-management/update/response-locked", + # f"{ogcapi_proc_part4}/rec/job-management/update-body-openeo", + f"{ogcapi_proc_part4}/req/job-management/create-post-op", + f"{ogcapi_proc_part4}/req/job-management/create-content-type", + f"{ogcapi_proc_part4}/req/job-management/create-response-body", + f"{ogcapi_proc_part4}/req/job-management/create-response-jobid", + f"{ogcapi_proc_part4}/req/job-management/create-response-success", + # f"{ogcapi_proc_part4}/req/job-management/create-unsupported-schema", + f"{ogcapi_proc_part4}/req/job-management/create-unsupported-media-type", + f"{ogcapi_proc_part4}/req/job-management/definition-get-op", + f"{ogcapi_proc_part4}/req/job-management/definition-response-body", + f"{ogcapi_proc_part4}/req/job-management/definition-response-success", + f"{ogcapi_proc_part4}/req/job-management/start-post-op", + f"{ogcapi_proc_part4}/req/job-management/start-response", + f"{ogcapi_proc_part4}/req/job-management/update-body", + f"{ogcapi_proc_part4}/req/job-management/update-content-type", + f"{ogcapi_proc_part4}/req/job-management/update-patch-op", + f"{ogcapi_proc_part4}/req/job-management/update-response", + f"{ogcapi_proc_part4}/req/job-management/update-response-locked", # FIXME: employ 'weaver.wps_restapi.quotation.utils.check_quotation_supported' to add below conditionally # FIXME: https://github.com/crim-ca/weaver/issues/156 (billing/quotation) # https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/billing diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index e13af3ed0..76847b581 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -251,7 +251,7 @@ def create_job(request): if not proc_id: raise HTTPUnsupportedMediaType(json={ "title": "Unsupported Media Type", - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type", "detail": "Process URL or identifier reference missing or invalid.", "status": HTTPUnsupportedMediaType.code, "cause": {"headers": {"Content-Type": ctype}}, diff --git a/weaver/wps_restapi/utils.py b/weaver/wps_restapi/utils.py index 8cc695951..789616dce 100644 --- a/weaver/wps_restapi/utils.py +++ b/weaver/wps_restapi/utils.py @@ -6,34 +6,21 @@ from typing import TYPE_CHECKING import colander -import yaml from box import Box from pyramid.events import BeforeRender, subscriber -from pyramid.httpexceptions import ( - HTTPBadRequest, - HTTPInternalServerError, - HTTPSuccessful, - HTTPUnprocessableEntity, - HTTPUnsupportedMediaType, - status_map -) +from pyramid.httpexceptions import HTTPBadRequest, HTTPSuccessful, status_map from weaver import __meta__ -from weaver.formats import repr_json from weaver.utils import get_header, get_settings, get_weaver_url from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: - from typing import Any, Dict, Optional, Union + from typing import Any, Dict, Optional - from weaver.formats import ContentType from weaver.typedefs import ( AnyCallableWrapped, - AnyRequestType, AnySettingsContainer, - CWL, HeadersType, - JSON, Params, Return, SettingsType @@ -187,64 +174,6 @@ def wrapped(*args, **kwargs): return decorator -def parse_content(request=None, # type: Optional[AnyRequestType] - content=None, # type: Optional[Union[JSON, str]] - content_schema=None, # type: Optional[colander.SchemaNode] - content_type=sd.RequestContentTypeHeader.default, # type: Optional[ContentType] - content_type_schema=sd.RequestContentTypeHeader, # type: Optional[colander.SchemaNode] - ): # type: (...) -> Union[JSON, CWL] - """ - Load the request content with validation of expected content type and their schema. - """ - if request is None and content is None: # pragma: no cover # safeguard for early detect invalid implementation - raise HTTPInternalServerError(json={ - "title": "Internal Server Error", - "type": "InternalServerError", - "detail": "Cannot parse undefined contents.", - "status": HTTPInternalServerError.code, - "cause": "Request content and content argument are undefined.", - }) - try: - if request is not None: - content = request.text - content_type = request.content_type - if content_type is not None and content_type_schema is not None: - content_type = content_type_schema().deserialize(content_type) - if isinstance(content, str): - content = yaml.safe_load(content) - if not isinstance(content, dict): - raise TypeError("Not a valid JSON body for process deployment.") - except colander.Invalid as exc: - raise HTTPUnsupportedMediaType(json={ - "title": "Unsupported Media Type", - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-2/1.0/unsupported-media-type", - "detail": str(exc), - "status": HTTPUnsupportedMediaType.code, - "cause": {"Content-Type": None if content_type is None else str(content_type)}, - }) - except Exception as exc: - raise HTTPBadRequest(json={ - "title": "Bad Request", - "type": "BadRequest", - "detail": "Unable to parse contents.", - "status": HTTPBadRequest.code, - "cause": str(exc), - }) - try: - if content_schema is not None: - content = content_schema().deserialize(content) - except colander.Invalid as exc: - raise HTTPUnprocessableEntity(json={ - "type": "InvalidParameterValue", - "title": "Failed schema validation.", - "status": HTTPUnprocessableEntity.code, - "error": colander.Invalid.__name__, - "cause": exc.msg, - "value": repr_json(exc.value, force_string=False), - }) - return content - - @subscriber(BeforeRender) def add_renderer_context(event): # type: (BeforeRender) -> None From c835590587fcd260387b29e938bc9eef380a742c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 20:32:57 -0400 Subject: [PATCH 21/33] fix auto execution mode with accept header to resolve as sync execution for non-JSON job status --- tests/wps_restapi/test_jobs.py | 21 +++++++++++---------- weaver/processes/execution.py | 2 +- weaver/wps_restapi/jobs/utils.py | 6 +++++- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 1fcb29a31..b2f89c201 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -40,6 +40,7 @@ ) from weaver.formats import ContentType from weaver.notify import decrypt_email +from weaver.processes.constants import JobStatusSchema from weaver.processes.wps_testing import WpsTestProcess from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory from weaver.utils import get_path_kvp, now @@ -1936,11 +1937,11 @@ def test_job_status_alt_openeo_accept_response(self): """ job = self.job_info[0] assert job.status == Status.SUCCEEDED, "Precondition invalid." - headers = {"Accept": "application/json; profile=openeo"} + headers = {"Accept": f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}"} path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.FINISHED @@ -1949,7 +1950,7 @@ def test_job_status_alt_openeo_accept_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.ERROR @@ -1958,7 +1959,7 @@ def test_job_status_alt_openeo_accept_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.RUNNING @@ -1967,7 +1968,7 @@ def test_job_status_alt_openeo_accept_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=headers) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.QUEUED @@ -1980,9 +1981,9 @@ def test_job_status_alt_openeo_profile_response(self): job = self.job_info[0] assert job.status == Status.SUCCEEDED, "Precondition invalid." path = f"/jobs/{job.id}" - resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) + resp = self.app.get(path, headers=self.json_headers, params={"schema": JobStatusSchema.OPENEO}) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.FINISHED @@ -1991,7 +1992,7 @@ def test_job_status_alt_openeo_profile_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.ERROR @@ -2000,7 +2001,7 @@ def test_job_status_alt_openeo_profile_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.RUNNING @@ -2009,7 +2010,7 @@ def test_job_status_alt_openeo_profile_response(self): path = f"/jobs/{job.id}" resp = self.app.get(path, headers=self.json_headers, params={"schema": "openeo"}) assert resp.status_code == 200 - assert resp.headers["Content-Type"] == "application/json; profile=openeo" + assert resp.headers["Content-Type"] == f"{ContentType.APP_JSON}; profile={JobStatusSchema.OPENEO}" assert resp.headers["Content-Schema"] == sd.OPENEO_API_SCHEMA_JOB_STATUS_URL assert resp.json["status"] == Status.QUEUED diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index cb3d3c94c..f2a3715fd 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -1095,7 +1095,7 @@ def validate_job_accept_header(headers, execution_mode): if ContentType.APP_JSON in accept: return ContentType.APP_JSON # anything always allowed in sync, since results returned directly - if execution_mode == ExecuteMode.SYNC: + if execution_mode in [ExecuteMode.SYNC, ExecuteMode.AUTO]: return accept if ContentType.ANY in accept: return diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 9c085bad3..ba1f378cc 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -321,8 +321,8 @@ def get_job_status_schema(request): """ Identifies if a :term:`Job` status response schema applies for the request. """ + def make_headers(resolved_schema): - content_accept = request.accept.header_value or ContentType.APP_JSON content_type = clean_media_type_format(content_accept, strip_parameters=True) content_profile = f"{content_type}; profile={resolved_schema}" content_headers = {"Content-Type": content_profile} @@ -332,6 +332,10 @@ def make_headers(resolved_schema): content_headers["Content-Schema"] = sd.OPENEO_API_SCHEMA_JOB_STATUS_URL return content_headers + content_accept = request.accept.header_value or ContentType.APP_JSON + if content_accept == ContentType.ANY: + content_accept = ContentType.APP_JSON + params = get_request_args(request) schema = JobStatusSchema.get(params.get("profile") or params.get("schema")) if schema: From fafe3b5d7025df77f206891906c81f1ad71b9f34 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 21:24:59 -0400 Subject: [PATCH 22/33] fix accept header resolution by cornice-swagger when omitted for endpoint --- tests/wps_restapi/test_jobs.py | 2 +- weaver/wps_restapi/jobs/jobs.py | 1 - weaver/wps_restapi/patches.py | 7 ++++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index b2f89c201..e21114045 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1802,7 +1802,7 @@ def test_job_inputs_response(self): "Prefer": f"return={ExecuteReturnPreference.MINIMAL}", "X-WPS-Output-Context": "test/context", } - assert resp.json["mode"] == ExecuteMode.ASYNC + assert resp.json["mode"] == ExecuteMode.AUTO assert resp.json["response"] == ExecuteResponse.DOCUMENT assert "subscribers" not in resp.json, "Subscribers must not be exposed due to potentially sensible data" diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 76847b581..dbe6dccca 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -1,4 +1,3 @@ -from moto.batch.utils import JobStatus from typing import TYPE_CHECKING from box import Box diff --git a/weaver/wps_restapi/patches.py b/weaver/wps_restapi/patches.py index 8f18588d7..372687f40 100644 --- a/weaver/wps_restapi/patches.py +++ b/weaver/wps_restapi/patches.py @@ -80,10 +80,11 @@ class ServiceAutoAcceptDecorator(CorniceService): def decorator(self, method, accept=None, **kwargs): # type: (RequestMethod, Optional[str, Sequence[str]], Any) -> Callable[[AnyViewCallable], AnyViewCallable] - if isinstance(accept, str) or accept is None: + if not accept: + return super().decorator(method, **kwargs) # don't inject 'accept=None', causes cornice-swagger error + if isinstance(accept, str): return super().decorator(method, accept=accept, **kwargs) - - if not hasattr(accept, "__iter__") or not all(isinstance(header, str) for header in accept): + if not hasattr(accept, "__iter__") or not all(isinstance(header, str) for header in accept): # type: ignore raise ValueError("Service decorator parameter 'accept' must be a single string or a sequence of strings.") def wrapper(view): From f1d69c2d1173a6668d990a17e31f5fda5958cbb7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 22 Oct 2024 21:54:28 -0400 Subject: [PATCH 23/33] fix imports linting --- weaver/wps_restapi/jobs/jobs.py | 4 ++-- weaver/wps_restapi/jobs/utils.py | 2 +- weaver/wps_restapi/utils.py | 9 +-------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index dbe6dccca..ec50c11fe 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -42,11 +42,11 @@ from weaver.wps_restapi.jobs.utils import ( dismiss_job_task, get_job, + get_job_io_schema_query, get_job_list_links, get_job_results_response, - get_results, - get_job_io_schema_query, get_job_status_schema, + get_results, raise_job_bad_status_locked, raise_job_bad_status_success, raise_job_dismissed, diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index ba1f378cc..6952d9b6f 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -40,7 +40,7 @@ parse_prefer_header_return, update_preference_applied_return_header ) -from weaver.formats import ContentEncoding, ContentType, get_format, repr_json, clean_media_type_format +from weaver.formats import ContentEncoding, ContentType, clean_media_type_format, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field diff --git a/weaver/wps_restapi/utils.py b/weaver/wps_restapi/utils.py index 789616dce..883ea65b7 100644 --- a/weaver/wps_restapi/utils.py +++ b/weaver/wps_restapi/utils.py @@ -17,14 +17,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Optional - from weaver.typedefs import ( - AnyCallableWrapped, - AnySettingsContainer, - HeadersType, - Params, - Return, - SettingsType - ) + from weaver.typedefs import AnyCallableWrapped, AnySettingsContainer, HeadersType, Params, Return, SettingsType LOGGER = logging.getLogger(__name__) From 5ec1c33022ae200a19c5d434703a39b95ea349a3 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 23 Oct 2024 15:42:09 -0400 Subject: [PATCH 24/33] add test coverage for job attributes --- tests/test_datatype.py | 183 ++++++++++++++++++++++++++++++++++++++++- weaver/datatype.py | 14 ++-- 2 files changed, 190 insertions(+), 7 deletions(-) diff --git a/tests/test_datatype.py b/tests/test_datatype.py index 051b94f59..1ddbec211 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -1,11 +1,18 @@ import uuid from copy import deepcopy +from datetime import datetime, timedelta import pytest from tests import resources -from weaver.datatype import Authentication, AuthenticationTypes, DockerAuthentication, Process -from weaver.execute import ExecuteControlOption +from visibility import Visibility +from weaver.datatype import Authentication, AuthenticationTypes, DockerAuthentication, Job, Process, Service +from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteReturnPreference +from weaver.formats import ContentType +from weaver.status import Status +from weaver.utils import localize_datetime, now + +TEST_UUID = uuid.uuid4() def test_package_encode_decode(): @@ -206,3 +213,175 @@ def test_process_io_schema_ignore_uri(): ]) def test_process_split_version(process_id, result): assert Process.split_version(process_id) == result + + +@pytest.mark.parametrize( + ["attribute", "value", "result"], + [ + ("user_id", TEST_UUID, TEST_UUID), + ("user_id", str(TEST_UUID), str(TEST_UUID)), + ("user_id", "not-a-uuid", "not-a-uuid"), + ("user_id", 1234, 1234), + ("user_id", 3.14, TypeError), + ("task_id", TEST_UUID, TEST_UUID), + ("task_id", str(TEST_UUID), TEST_UUID), + ("task_id", "not-a-uuid", "not-a-uuid"), + ("task_id", 1234, TypeError), + ("wps_id", TEST_UUID, TEST_UUID), + ("wps_id", str(TEST_UUID), TEST_UUID), + ("wps_id", 1234, TypeError), + ("wps_url", "https://example.com/wps", "https://example.com/wps"), + ("wps_url", 1234, TypeError), + ("execution_mode", ExecuteMode.ASYNC, ExecuteMode.ASYNC), + ("execution_mode", None, ValueError), # "auto" required if unspecified + ("execution_mode", "abc", ValueError), + ("execution_mode", 12345, ValueError), + ("execution_response", ExecuteResponse.RAW, ExecuteResponse.RAW), + ("execution_response", None, ExecuteResponse.DOCUMENT), # weaver's default + ("execution_response", "abc", ValueError), + ("execution_response", 12345, ValueError), + ("execution_return", ExecuteReturnPreference.REPRESENTATION, ExecuteReturnPreference.REPRESENTATION), + ("execution_return", None, ExecuteReturnPreference.MINIMAL), # weaver's default + ("execution_return", "abc", ValueError), + ("execution_return", 12345, ValueError), + ("execution_wait", 1234, 1234), + ("execution_wait", None, None), + ("execution_wait", "abc", ValueError), + ("is_local", True, True), + ("is_local", 1, TypeError), + ("is_local", None, TypeError), + ("is_workflow", True, True), + ("is_workflow", 1, TypeError), + ("is_workflow", None, TypeError), + ("created", "2024-01-02", localize_datetime(datetime(year=2024, month=1, day=2))), + ("created", datetime(year=2024, month=1, day=2), localize_datetime(datetime(year=2024, month=1, day=2))), + ("created", "abc", ValueError), + ("created", 12345, TypeError), + ("updated", "2024-01-02", localize_datetime(datetime(year=2024, month=1, day=2))), + ("updated", datetime(year=2024, month=1, day=2), localize_datetime(datetime(year=2024, month=1, day=2))), + ("updated", "abc", ValueError), + ("updated", 12345, TypeError), + ("service", Service(name="test", url="https://example.com/wps"), "test"), + ("service", "test", "test"), + ("service", 1234, TypeError), + ("service", None, TypeError), + ("process", Process(id="test", package={}), "test"), + ("process", "test", "test"), + ("process", 1234, TypeError), + ("process", None, TypeError), + ("progress", "test", TypeError), + ("process", None, TypeError), + ("progress", 123, ValueError), + ("progress", -20, ValueError), + ("progress", 50, 50), + ("progress", 2.5, 2.5), + ("statistics", {}, {}), + ("statistics", None, TypeError), + ("statistics", 1234, TypeError), + ("exceptions", [], []), + ("exceptions", {}, TypeError), + ("exceptions", "error", TypeError), + ("exceptions", None, TypeError), + ("exceptions", 1234, TypeError), + ("results", [], []), + ("results", None, TypeError), + ("results", 1234, TypeError), + ("logs", [], []), + ("logs", "info", TypeError), + ("logs", None, TypeError), + ("logs", 1234, TypeError), + ("tags", [], []), + ("tags", "test", TypeError), + ("tags", None, TypeError), + ("tags", 1234, TypeError), + ("status", Status.SUCCEEDED, Status.SUCCEEDED), + ("status", 12345678, ValueError), + ("status", "random", ValueError), + ("status_message", None, "no message"), + ("status_message", "test", "test"), + ("status_message", 123456, TypeError), + ("status_location", f"https://example.com/jobs/{TEST_UUID}", f"https://example.com/jobs/{TEST_UUID}"), + ("status_location", None, TypeError), + ("status_location", 123456, TypeError), + ("accept_type", None, TypeError), + ("accept_type", 123456, TypeError), + ("accept_type", ContentType.APP_JSON, ContentType.APP_JSON), + ("accept_language", None, TypeError), + ("accept_language", 123456, TypeError), + ("accept_language", "en", "en"), + ("access", Visibility.PRIVATE, Visibility.PRIVATE), + ("access", 12345678, ValueError), + ("access", "random", ValueError), + ("access", None, ValueError), + ("context", "test", "test"), + ("context", None, None), + ("context", 1234, TypeError), + ] +) +def test_job_attribute_setter(attribute, value, result): + job = Job(task_id="test") + if isinstance(result, type) and issubclass(result, Exception): + with pytest.raises(result): + setattr(job, attribute, value) + else: + setattr(job, attribute, value) + assert job[attribute] == result + + +@pytest.mark.parametrize( + ["value", "result"], + [ + (TEST_UUID, TEST_UUID), + (str(TEST_UUID), TEST_UUID), + ("not-a-uuid", ValueError), + (12345, TypeError), + + ] +) +def test_job_id(value, result): + if isinstance(result, type) and issubclass(result, Exception): + with pytest.raises(result): + Job(task_id="test", id=value) + else: + job = Job(task_id="test", id=value) + assert job.id == result + + +def test_job_updated_auto(): + min_dt = now() + job = Job(task_id="test") + update_dt = job.updated + assert isinstance(update_dt, datetime) + assert update_dt > min_dt + assert update_dt == job.updated, "Updated time auto generated should have been set to avoid always regenerating it." + + +def test_job_updated_status(): + created = now() + started = now() + timedelta(seconds=1) + finished = now() + timedelta(seconds=2) + # date-times cannot be set in advance in job, + # otherwise 'updated' detects and returns them automatically + job = Job(task_id="test") + job.created = created + job.status = Status.ACCEPTED + assert job.updated == created + job["updated"] = None # reset to test auto resolve + job.started = started + job.status = Status.STARTED + assert job.updated == started + job["updated"] = None # reset to test auto resolve + job.finished = finished + job.status = Status.SUCCEEDED + assert job.updated == finished + + +def test_job_execution_wait_ignored_async(): + job = Job(task_id="test", execution_wait=1234, execution_mode=ExecuteMode.ASYNC) + assert job.execution_mode == ExecuteMode.ASYNC + assert job.execution_wait is None, "Because of async explicitly set, wait time does not apply" + + +def test_job_display(): + job = Job(task_id=TEST_UUID, id=TEST_UUID) + assert str(job) == f"Job <{TEST_UUID}>" diff --git a/weaver/datatype.py b/weaver/datatype.py index 6f20db7f6..22af34259 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -319,6 +319,8 @@ def __get__(self, instance, *_): if instance is None: # allow access to the descriptor as class attribute 'getattr(type(instance), property-name)' return self # noqa + if self.fget != self.__get__: # ensure that any 'fget' specified at property creation is employed + return self.fget(instance) dt = instance.get(self.name, None) if not dt: if self.default_now: @@ -330,7 +332,9 @@ def __get__(self, instance, *_): def __set__(self, instance, value): # type: (Any, Union[datetime, str]) -> None - if isinstance(str, datetime): + if self.fset != self.__set__: # ensure that any 'fset' specified at property creation is employed + return self.fset(instance, value) + if isinstance(value, str): value = dt_parse(value) if not isinstance(value, datetime): name = fully_qualified_name(instance) @@ -966,14 +970,14 @@ def _set_outputs(self, outputs): @property def user_id(self): - # type: () -> Optional[str] + # type: () -> Optional[Union[AnyUUID, int]] return self.get("user_id", None) @user_id.setter def user_id(self, user_id): - # type: (Optional[str]) -> None - if not isinstance(user_id, int) or user_id is None: - raise TypeError(f"Type 'int' is required for '{self.__name__}.user_id'") + # type: (Optional[Union[AnyUUID, int]]) -> None + if not isinstance(user_id, (int, str, uuid.UUID)) or user_id is None: + raise TypeError(f"Type 'int', 'str' or a UUID is required for '{self.__name__}.user_id'") self["user_id"] = user_id @property From aaf978a225e026b250722d117bc261deee4afee4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 23 Oct 2024 20:08:23 -0400 Subject: [PATCH 25/33] add Job title attribute to align with openEO --- CHANGES.rst | 4 +++ tests/test_datatype.py | 4 +++ tests/wps_restapi/test_jobs.py | 39 +++++++++++++++++++++++ weaver/datatype.py | 14 ++++++++ weaver/processes/execution.py | 2 +- weaver/wps_restapi/colander_extras.py | 4 +-- weaver/wps_restapi/jobs/utils.py | 1 + weaver/wps_restapi/swagger_definitions.py | 20 +++++++++++- 8 files changed, 84 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8962811e5..eeae25142 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -27,6 +27,10 @@ Changes: - Align ``GET /jobs/{jobID}/outputs`` with requirements of *OGC API - Processes - Part 4: Job Management* endpoints such that omitting the ``schema`` query parameter will automatically apply the `OGC` mapping representation by default. Previous behavior was to return whichever representation that was used by the internal `Process` interface. +- Align `Job` status and update operations with some of the `openEO` behaviors, such as supporting a `Job` ``title`` + and allowing ``status`` to return `openEO` values when using ``profile=openeo`` in the ``Content-Type`` or using + the query parameter ``profile``/``schema``. The ``Content-Schema`` will also reflect the resolved representation + in the `Job` status response. - Add support of ``response: raw`` execution request body parameter as alternative to ``response: document``, which allows directly returning the result contents or ``Link`` headers rather then embedding them in a `JSON` response (fixes `#376 `_). diff --git a/tests/test_datatype.py b/tests/test_datatype.py index 1ddbec211..667984d84 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -294,6 +294,10 @@ def test_process_split_version(process_id, result): ("tags", "test", TypeError), ("tags", None, TypeError), ("tags", 1234, TypeError), + ("title", "test", "test"), + ("title", None, None), + ("title", TypeError, TypeError), + ("title", 1234, TypeError), ("status", Status.SUCCEEDED, Status.SUCCEEDED), ("status", 12345678, ValueError), ("status", "random", ValueError), diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index e21114045..fa23033ca 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1906,6 +1906,45 @@ def test_job_update_response(self): } } + @pytest.mark.oap_part4 + @pytest.mark.openeo + def test_job_update_title(self): + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, + ) + + path = f"/jobs/{new_job.id}" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert "title" not in resp.json + + title = "The new title!" + body = {"title": title} + resp = self.app.patch_json(path, params=body, headers=self.json_headers) + assert resp.status_code == 204 + + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["title"] == title + + body = {"title": None} + resp = self.app.patch_json(path, params=body, headers=self.json_headers) + assert resp.status_code == 204 + + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert "title" not in resp.json + + body = {"title": ""} + resp = self.app.patch_json(path, params=body, headers=self.json_headers, expect_errors=True) + assert resp.status_code == 422 + assert "title.JobTitle" in resp.json["cause"] + + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert "title" not in resp.json + @pytest.mark.oap_part4 def test_job_update_response_process_disallowed(self): proc_id = self.fully_qualified_test_name() diff --git a/weaver/datatype.py b/weaver/datatype.py index 22af34259..0f13d739e 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -937,6 +937,18 @@ def type(self): return "process" return "provider" + @property + def title(self): + # type: () -> Optional[str] + return self.get("title", None) + + @title.setter + def title(self, title): + # type: (Optional[str]) -> None + if not (isinstance(title, str) or not title): # disallow empty title as well + raise TypeError(f"Type 'str' or 'None' is required for '{self.__name__}.title'") + self["title"] = title + def _get_inputs(self): # type: () -> ExecutionInputs if self.get("inputs") is None: @@ -1547,6 +1559,7 @@ def json(self, container=None): # pylint: disable=W0221,arguments-differ "processID": self.process, "providerID": self.service, "type": self.type, + "title": self.title, "status": map_status(self.status), "message": self.status_message, "created": self.created, @@ -1577,6 +1590,7 @@ def params(self): "wps_url": self.wps_url, "service": self.service, "process": self.process, + "title": self.title, "inputs": self.inputs, "outputs": self.outputs, "user_id": self.user_id, diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index f2a3715fd..dc7d62540 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -1074,7 +1074,7 @@ def validate_job_schema(payload, body_schema=sd.Execute): "type": "InvalidSchema", "title": "Invalid Job Execution Schema", "detail": "Execution body failed schema validation.", - "status": HTTPBadRequest.code, + "status": HTTPUnprocessableEntity.code, "error": ex.msg, "cause": ex.asdict(), "value": repr_json(ex.value), diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 8c23337e2..2f85b2aac 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -598,7 +598,7 @@ def serialize(self, node, appstruct): # noqa raise colander.Invalid( node, colander._( - "${val} cannot be serialized: ${err}", + "${val} cannot be processed: ${err}", mapping={"val": appstruct, "err": "Not 'null'."}, ), ) @@ -788,7 +788,7 @@ class SchemaB(MappingSchema): SchemaB().deserialize({"s1": None, "s2": {"field": "ok"}}) # results: {'s2': {'field': 'ok'}} - .. seealso: + .. seealso:: - https://github.com/Pylons/colander/issues/276 - https://github.com/Pylons/colander/issues/299 diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 6952d9b6f..17aa3035d 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -323,6 +323,7 @@ def get_job_status_schema(request): """ def make_headers(resolved_schema): + # type: (JobStatusSchemaType) -> HeadersType content_type = clean_media_type_format(content_accept, strip_parameters=True) content_profile = f"{content_type}; profile={resolved_schema}" content_headers = {"Content-Type": content_profile} diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index b19fbf356..207ecb4e5 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2194,6 +2194,12 @@ class JobTypeEnum(ExtendedSchemaNode): validator = OneOf(["process", "provider", "service"]) +class JobTitle(ExtendedSchemaNode): + schema_type = String + description = "Title assigned to the job for user-readable identification." + validator = Length(min=1) + + class JobSortEnum(ExtendedSchemaNode): schema_type = String title = "JobSortingMethod" @@ -3747,6 +3753,7 @@ class JobStatusInfo(ExtendedMappingSchema): providerID = ProcessIdentifier(missing=None, default=None, description="Provider identifier corresponding to the job execution.") type = JobTypeEnum(description="Type of the element associated to the creation of this job.") + title = JobTitle(missing=drop) status = JobStatusEnum(description="Last updated status.") message = ExtendedSchemaNode(String(), missing=drop, description="Information about the last status update.") created = ExtendedSchemaNode(DateTime(), missing=drop, default=None, @@ -4240,6 +4247,7 @@ class Execute(ExecuteInputOutputs): "This parameter is required if the process cannot be inferred from the request endpoint." ), ) + title = JobTitle(missing=drop) status = JobStatusCreate( description=( "Status to request creation of the job without submitting it to processing queue " @@ -6616,9 +6624,19 @@ class PostProcessJobsEndpointXML(PostJobsEndpointXML, LocalProcessPath): pass +class JobTitleNullable(OneOfKeywordSchema): + description = "Job title to update, or unset if 'null'." + _one_of = [ + JobTitle(), + ExtendedSchemaNode(NoneType(), name="null"), + ] + + class PatchJobBodySchema(Execute): description = "Execution request contents to be updated." - # all parameters that are not 'missing=drop' must be added to allow partial update + # allow explicit 'title: null' do unset a predefined title + title = JobTitleNullable(missing=null) # 'null' ensures that, if provided, oneOf evaluates rather than drop invalid + # all parameters that are not 'missing=drop' in original 'Execute' definition must be added to allow partial update inputs = ExecuteInputValues(missing=drop, description="Input values or references to be updated.") outputs = ExecuteOutputSpec(missing=drop, description="Output format and transmission mode to be updated.") From d52d6f33a7b950f6a9db86b6dd4b60453b3cd8ec Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 23 Oct 2024 23:47:06 -0400 Subject: [PATCH 26/33] dismiss invalid linting issues --- tests/test_datatype.py | 4 ++-- weaver/datatype.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_datatype.py b/tests/test_datatype.py index 667984d84..418d259a2 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -1,11 +1,11 @@ import uuid from copy import deepcopy - from datetime import datetime, timedelta + import pytest +from visibility import Visibility from tests import resources -from visibility import Visibility from weaver.datatype import Authentication, AuthenticationTypes, DockerAuthentication, Job, Process, Service from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteReturnPreference from weaver.formats import ContentType diff --git a/weaver/datatype.py b/weaver/datatype.py index 0f13d739e..5b79d8f2e 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -319,7 +319,8 @@ def __get__(self, instance, *_): if instance is None: # allow access to the descriptor as class attribute 'getattr(type(instance), property-name)' return self # noqa - if self.fget != self.__get__: # ensure that any 'fget' specified at property creation is employed + # ensure that any 'fget' specified at property creation is employed + if self.fget != self.__get__: # pylint: disable=W0143 return self.fget(instance) dt = instance.get(self.name, None) if not dt: @@ -332,7 +333,8 @@ def __get__(self, instance, *_): def __set__(self, instance, value): # type: (Any, Union[datetime, str]) -> None - if self.fset != self.__set__: # ensure that any 'fset' specified at property creation is employed + # ensure that any 'fset' specified at property creation is employed + if self.fset != self.__set__: # pylint: disable=W0143 return self.fset(instance, value) if isinstance(value, str): value = dt_parse(value) From 7ce3afd99f79c559315a5d16bb7e285d072e61d2 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 24 Oct 2024 00:17:18 -0400 Subject: [PATCH 27/33] more test coverage --- tests/test_execute.py | 52 +++++++++++++++++++++++++++++++++- tests/wps_restapi/test_jobs.py | 5 ++-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index fbcc587f8..3696d48ff 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -3,7 +3,14 @@ import pytest from pyramid.httpexceptions import HTTPBadRequest -from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteReturnPreference, parse_prefer_header_execute_mode +from weaver.datatype import Job +from weaver.execute import ( + ExecuteControlOption, + ExecuteMode, + ExecuteReturnPreference, + parse_prefer_header_execute_mode, + update_preference_applied_return_header +) @pytest.mark.parametrize( @@ -102,3 +109,46 @@ def test_parse_prefer_header_execute_mode_invalid(prefer_header): headers = {"Prefer": prefer_header} with pytest.raises(HTTPBadRequest): parse_prefer_header_execute_mode(headers, [ExecuteControlOption.ASYNC]) + + +@pytest.mark.parametrize( + ["job_return", "request_headers", "response_headers", "expect_headers"], + [ + ( + None, + {}, + {}, + {}, + ), + ( + None, + {"Prefer": "respond-async, wait=4"}, + {}, + {}, + ), + ( + None, + {"Prefer": f"return={ExecuteReturnPreference.MINIMAL}"}, + {}, + {"Preference-Applied": f"return={ExecuteReturnPreference.MINIMAL}"}, + ), + ( + ExecuteReturnPreference.MINIMAL, + {"Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}"}, + {}, + {}, + ), + ( + ExecuteReturnPreference.MINIMAL, + {"Prefer": f"return={ExecuteReturnPreference.MINIMAL}"}, + {"Preference-Applied": "respond-async"}, + {"Preference-Applied": f"return={ExecuteReturnPreference.MINIMAL}; respond-async"}, + ), + ] +) +def test_update_preference_applied_return_header(job_return, request_headers, response_headers, expect_headers): + job = Job(task_id="test") + if job_return: + job.execution_return = job_return + update_headers = update_preference_applied_return_header(job, request_headers, response_headers) + assert update_headers == expect_headers diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index fa23033ca..4dac15c5f 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1826,10 +1826,11 @@ def test_job_run_response(self): raise NotImplementedError # FIXME (https://github.com/crim-ca/weaver/issues/673) @pytest.mark.oap_part4 - def test_job_update_locked(self): + @parameterized.expand([Status.ACCEPTED, Status.RUNNING, Status.FAILED, Status.SUCCEEDED]) + def test_job_update_locked(self, status): new_job = self.make_job( task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, - status=Status.RUNNING, progress=100, access=Visibility.PUBLIC, + status=status, progress=100, access=Visibility.PUBLIC, inputs={"test": "data"}, outputs={"test": {"transmissionMode": ExecuteTransmissionMode.VALUE}}, ) path = f"/jobs/{new_job.id}" From 6fd4ec29fa28fc55740777da145d4fe295a9f2cc Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 24 Oct 2024 00:24:57 -0400 Subject: [PATCH 28/33] patch flaky test --- tests/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index 14281efdf..1154459e6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -654,6 +654,7 @@ def mock_sleep(delay): assert all(called == expect for called, expect in zip(sleep_counter["called_with"], intervals)) +@pytest.mark.flaky(reruns=2, reruns_delay=1) def test_request_extra_zero_values(): """ Test that zero-value ``retries`` and ``backoff`` are not ignored. From 0f68b98094337398b902da5378ab05cc1aa0026a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 24 Oct 2024 12:48:05 -0400 Subject: [PATCH 29/33] test more job update combinations --- tests/wps_restapi/test_jobs.py | 102 +++++++++++++++++++++- weaver/processes/execution.py | 76 ++++++++++------ weaver/wps_restapi/swagger_definitions.py | 9 +- 3 files changed, 156 insertions(+), 31 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 4dac15c5f..8efdafa42 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1840,7 +1840,24 @@ def test_job_update_locked(self, status): assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/locked" @pytest.mark.oap_part4 - def test_job_update_response(self): + def test_job_update_unsupported_media_type(self): + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, + ) + path = f"/jobs/{new_job.id}" + resp = self.app.patch(path, params="data", expect_errors=True) + assert resp.status_code == 415 + assert resp.content_type == ContentType.APP_JSON + assert resp.json["type"] == ( + "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type" + ) + + @pytest.mark.oap_part4 + def test_job_update_response_contents(self): + """ + Validate that :term:`Job` metadata and responses are updated with contents as requested. + """ new_job = self.make_job( task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, status=Status.CREATED, progress=0, access=Visibility.PUBLIC, @@ -1907,6 +1924,89 @@ def test_job_update_response(self): } } + @pytest.mark.oap_part4 + def test_job_update_execution_parameters(self): + """ + Test modification of the execution ``return`` and ``response`` options, going back-and-forth between approaches. + """ + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, + execute_mode=ExecuteMode.AUTO, + execute_response=ExecuteResponse.DOCUMENT, + ) + + body = {} + path = f"/jobs/{new_job.id}" + headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}", + } + resp = self.app.patch_json(path, params=body, headers=headers) + assert resp.status_code == 204 + + path = f"/jobs/{new_job.id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["mode"] == ExecuteMode.AUTO + assert resp.json["response"] == ExecuteResponse.RAW + assert resp.json["headers"]["Prefer"] == f"return={ExecuteReturnPreference.REPRESENTATION}" + + body = {"response": ExecuteResponse.DOCUMENT} + path = f"/jobs/{new_job.id}" + resp = self.app.patch_json(path, params=body, headers=self.json_headers) + assert resp.status_code == 204 + + path = f"/jobs/{new_job.id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["mode"] == ExecuteMode.AUTO + assert resp.json["response"] == ExecuteResponse.DOCUMENT + assert resp.json["headers"]["Prefer"] == f"return={ExecuteReturnPreference.MINIMAL}" + + body = {} + headers = { + "Prefer": f"return={ExecuteReturnPreference.REPRESENTATION}", + } + path = f"/jobs/{new_job.id}" + resp = self.app.patch_json(path, params=body, headers=headers) + assert resp.status_code == 204 + + path = f"/jobs/{new_job.id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["mode"] == ExecuteMode.AUTO + assert resp.json["response"] == ExecuteResponse.RAW + assert resp.json["headers"]["Prefer"] == f"return={ExecuteReturnPreference.REPRESENTATION}" + + body = {"response": ExecuteResponse.RAW} + path = f"/jobs/{new_job.id}" + resp = self.app.patch_json(path, params=body, headers=self.json_headers) + assert resp.status_code == 204 + + path = f"/jobs/{new_job.id}/inputs" + resp = self.app.get(path, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.json["mode"] == ExecuteMode.AUTO + assert resp.json["response"] == ExecuteResponse.RAW + assert resp.json["headers"]["Prefer"] == f"return={ExecuteReturnPreference.REPRESENTATION}" + + @pytest.mark.oap_part4 + def test_job_update_subscribers(self): + new_job = self.make_job( + task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, + status=Status.CREATED, progress=0, access=Visibility.PUBLIC, + subscribers={"successUri": "https://example.com/random"}, + ) + + # check that subscribers can be removed even if not communicated in job status responses + body = {"subscribers": {}} + path = f"/jobs/{new_job.id}" + resp = self.app.patch_json(path, params=body, headers=self.json_headers) + assert resp.status_code == 204 + + test_job = self.job_store.fetch_by_id(new_job.id) + assert test_job.subscribers is None + @pytest.mark.oap_part4 @pytest.mark.openeo def test_job_update_title(self): diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index dc7d62540..d47824432 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -15,7 +15,6 @@ HTTPAccepted, HTTPBadRequest, HTTPCreated, - HTTPException, HTTPNotAcceptable, HTTPUnprocessableEntity, HTTPUnsupportedMediaType @@ -951,9 +950,13 @@ def update_job_parameters(job, request): value = field = loc = None job_process = get_process(job.process) + validate_process_id(job_process, body) try: loc = "body" - validate_process_id(job_process, body) + + # used to avoid possible attribute name conflict + # (e.g.: 'job.response' vs 'job.execution_response') + execution_fields = ["response", "mode"] for node in sd.PatchJobBodySchema().children: field = node.name @@ -963,28 +966,44 @@ def update_job_parameters(job, request): continue # will be handled simultaneously after value = body[field] # type: ignore - if node.name in job: + if field not in execution_fields and field in job: setattr(job, field, value) - elif f"execution_{field}" in job: + elif field in execution_fields: field = f"execution_{field}" - if field == "execution_mode" and value in [ExecuteMode.ASYNC, ExecuteMode.SYNC]: - job_ctrl_exec = ExecuteControlOption.get(f"{value}-execute") - if job_ctrl_exec not in job_process.jobControlOptions: - raise HTTPBadRequest( - json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ - "type": "InvalidJobUpdate", - "title": "Invalid Job Execution Update", - "detail": "Update of job execution mode is not permitted by process jobControlOptions.", - "status": HTTPBadRequest.code, - "cause": {"name": "mode", "in": loc}, - "value": repr_json( - { - "process.jobControlOptions": job_process.jobControlOptions, - "job.mode": job_ctrl_exec, - }, force_string=False - ), - }) - ) + if field == "execution_mode": + if value == ExecuteMode.AUTO: + continue # don't override previously set value that resolved with default value by omission + if value in [ExecuteMode.ASYNC, ExecuteMode.SYNC]: + job_ctrl_exec = ExecuteControlOption.get(f"{value}-execute") + if job_ctrl_exec not in job_process.jobControlOptions: + raise HTTPBadRequest( + json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ + "type": "InvalidJobUpdate", + "title": "Invalid Job Execution Update", + "detail": ( + "Update of the job execution mode is not permitted " + "by supported jobControlOptions of the process description." + ), + "status": HTTPBadRequest.code, + "cause": {"name": "mode", "in": loc}, + "value": repr_json( + { + "process.jobControlOptions": job_process.jobControlOptions, + "job.mode": job_ctrl_exec, + }, force_string=False + ), + }) + ) + + # 'response' will take precedence, but (somewhat) align 'Prefer: return' value to match intention + # they are not 100% compatible because output 'transmissionMode' must be considered when + # resolving 'response', but given both 'response' and 'transmissionMode' override 'Prefer', + # this is an "acceptable" compromise (see docs 'Execution Response' section for more details) + if field == "execution_response": + if value == ExecuteResponse.RAW: + job.execution_return = ExecuteReturnPreference.REPRESENTATION + else: + job.execution_return = ExecuteReturnPreference.MINIMAL setattr(job, field, value) @@ -996,13 +1015,18 @@ def update_job_parameters(job, request): job.subscribers = subscribers # for both 'mode' and 'response' - # if provided both in body and corresponding 'Prefer' header parameter, the body parameter takes precedence + # if provided both in body and corresponding 'Prefer' header parameter, + # the body parameter takes precedence (set in code above) # however, if provided only in header, allow override of the body parameter considered as "higher priority" loc = "header" if ExecuteMode.get(body.get("mode"), default=ExecuteMode.AUTO) == ExecuteMode.AUTO: - mode, wait, _ = parse_prefer_header_execute_mode(request.headers, job_process.jobControlOptions) + mode, wait, _ = parse_prefer_header_execute_mode( + request.headers, + job_process.jobControlOptions, + return_auto=True, + ) job.execution_mode = mode - job.execution_wait = wait + job.execution_wait = wait if mode == ExecuteMode.SYNC else job.execution_wait if "response" not in body: job_return = parse_prefer_header_return(request.headers) if job_return: @@ -1012,8 +1036,6 @@ def update_job_parameters(job, request): else: job.execution_response = ExecuteResponse.DOCUMENT - except HTTPException: - raise except ValueError as exc: raise HTTPUnprocessableEntity( json=sd.ErrorJsonResponseBodySchema(schema_include=True).deserialize({ diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 207ecb4e5..e4975abff 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -6628,14 +6628,17 @@ class JobTitleNullable(OneOfKeywordSchema): description = "Job title to update, or unset if 'null'." _one_of = [ JobTitle(), - ExtendedSchemaNode(NoneType(), name="null"), + ExtendedSchemaNode(NoneType(), name="null"), # allow explicit 'title: null' to unset a predefined title ] class PatchJobBodySchema(Execute): description = "Execution request contents to be updated." - # allow explicit 'title: null' do unset a predefined title - title = JobTitleNullable(missing=null) # 'null' ensures that, if provided, oneOf evaluates rather than drop invalid + # 'missing=null' ensures that, if a field is provided with an "empty" definition (JSON null, no-field dict, etc.), + # contents are passed down as is rather than dropping them (what 'missing=drop' would do due to DropableSchemaNode) + # this is to allow "unsetting" any values that could have been defined during job creation or previous updates + title = JobTitleNullable(missing=null) + subscribers = JobExecuteSubscribers(missing=null) # all parameters that are not 'missing=drop' in original 'Execute' definition must be added to allow partial update inputs = ExecuteInputValues(missing=drop, description="Input values or references to be updated.") outputs = ExecuteOutputSpec(missing=drop, description="Output format and transmission mode to be updated.") From f6cab1429b15072a1eb58653015cc046cda2db92 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 24 Oct 2024 19:06:13 -0400 Subject: [PATCH 30/33] add docs about new part 4 support --- docs/examples/job_status_ogcapi.json | 41 +++++++ docs/examples/job_status_wps.xml | 29 +++++ docs/source/package.rst | 4 +- docs/source/processes.rst | 177 ++++++++++++++++++++++++--- docs/source/references.rst | 17 +++ weaver/processes/execution.py | 3 +- 6 files changed, 251 insertions(+), 20 deletions(-) create mode 100644 docs/examples/job_status_ogcapi.json create mode 100644 docs/examples/job_status_wps.xml diff --git a/docs/examples/job_status_ogcapi.json b/docs/examples/job_status_ogcapi.json new file mode 100644 index 000000000..a6b03157d --- /dev/null +++ b/docs/examples/job_status_ogcapi.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/schemas/statusInfo.yaml", + "jobID": "a305ef3e-3220-4d43-b1be-301f5ef13c23", + "processID": "example-process", + "providerID": null, + "type": "process", + "status": "succeeded", + "message": "Job succeeded.", + "created": "2024-10-02T14:21:12.380000+00:00", + "started": "2024-10-02T14:21:12.990000+00:00", + "finished": "2024-10-02T14:21:23.629000+00:00", + "updated": "2024-10-02T14:21:23.630000+00:00", + "duration": "00:00:10", + "runningDuration": "PT11S", + "runningSeconds": 10.639, + "percentCompleted": 100, + "progress": 100, + "links": [ + { + "title": "Job status.", + "hreflang": "en-CA", + "href": "https://example.com/processes/download-band-sentinel2-product-safe/jobs/a305ef3e-3220-4d43-b1be-301f5ef13c23", + "type": "application/json", + "rel": "status" + }, + { + "title": "Job monitoring location.", + "hreflang": "en-CA", + "href": "https://example.com/processes/download-band-sentinel2-product-safe/jobs/a305ef3e-3220-4d43-b1be-301f5ef13c23", + "type": "application/json", + "rel": "monitor" + }, + { + "title": "Job results of successful process execution (direct output values mapping).", + "hreflang": "en-CA", + "href": "https://example.com/processes/download-band-sentinel2-product-safe/jobs/a305ef3e-3220-4d43-b1be-301f5ef13c23/results", + "type": "application/json", + "rel": "http://www.opengis.net/def/rel/ogc/1.0/results" + } + ] +} diff --git a/docs/examples/job_status_wps.xml b/docs/examples/job_status_wps.xml new file mode 100644 index 000000000..be1a0d38a --- /dev/null +++ b/docs/examples/job_status_wps.xml @@ -0,0 +1,29 @@ + + + + example-process + + + Package operations complete. + + + + output + output + + + + diff --git a/docs/source/package.rst b/docs/source/package.rst index a41ace285..77557094d 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -194,7 +194,7 @@ When the above code is saved in a |jupyter-notebook|_ and committed to a Git rep utility can automatically clone the repository, parse the Python code, extract the :term:`CWL` annotations, and generate the :term:`Application Package` with a :term:`Docker` container containing all of their respective definitions. All of this is accomplished with a single call to obtain a deployable :term:`CWL` in `Weaver`, which can then take over -from the :ref:`Process Deployment ` to obtain an :term:`OGC API - Process` definition. +from the :ref:`Process Deployment ` to obtain an :term:`OGC API - Processes` definition. Jupyter Notebook to CWL Example: NCML to STAC Application ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1408,7 +1408,7 @@ However, the :term:`Vault` approach as potential drawbacks. .. note:: For more details about the :term:`Vault`, refer to sections :ref:`file_vault_inputs`, :ref:`vault_upload`, - and the corresponding capabilities in :term:`cli_example_upload`. + and the corresponding capabilities in :ref:`cli_example_upload`. .. _app_pkg_secret_cwltool: diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 3d204ef04..98d2037ba 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -613,6 +613,11 @@ Execution of a Process (Execute) For backward compatibility, the |exec-req-job|_ request is also supported as alias to the above :term:`OGC API - Processes` compliant endpoint. +.. seealso:: + Alternatively, the |job-exec-req|_ request can also be used to submit a :term:`Job` for later execution, + as well as enabling other advanced :ref:`proc_job_management` capabilities. + See :ref:`proc_op_job_create` for more details. + This section will first describe the basics of this request format (:ref:`proc_exec_body`), and after go into further details for specific use cases and parametrization of various input/output combinations (:ref:`proc_exec_mode`, :ref:`proc_exec_results`, etc.). @@ -1694,7 +1699,7 @@ the ``POST /search`` or the ``POST /collections/dataset-features/search`` could Alternatively, if an array of ``image/tiff; application=geotiff`` was expected by the :term:`Process` while targeting the ``collection`` on a :term:`STAC` server, the |stac-assets|_ matching the requested :term:`Media-Types` could -potentially be retrieved as input for the :term:`Process Execution `. +potentially be retrieved as input for the :ref:`Process Execution `. In summary, the |ogc-api-proc-part3-collection-input|_ offers a lot of flexibility with its resolution compared to the typical :ref:`Input Types ` (i.e.: ``Literal``, ``BoundingBox``, ``Complex``) that must be explicitly @@ -1997,17 +2002,93 @@ of the polling-based method on the :ref:`Job Status ` endpoint o .. seealso:: Refer to the |oas-rtd|_ of the |exec-req|_ request for all available ``subscribers`` properties. +.. _proc_job_management: + +Job Management +================================================== + +This section presents capabilities related to :term:`Job` management. +The endpoints and related operations are defined in a mixture of |ogc-api-proc|_ *Core* requirements, +some official extensions, and further `Weaver`-specific capabilities. + +.. seealso:: + - |ogc-api-proc-part1-spec-html|_ + - |ogc-api-proc-part4|_ + +.. _proc_op_job_create: + +Submitting a Job Creation +--------------------------------------------------------------------- + +.. important:: + All concepts introduced in the :ref:`Execution of a Process ` also apply in this section. + Consider reading the subsections for more specific details. + + This section will only cover *additional* concepts and parameters applicable only for this feature. + +Rather than using the |exec-req|_ request, the |job-exec-req|_ request can be used to submit a :term:`Job`. +When doing so, all parameters typically required for :term:`Process` execution must also be provided, including +any relevant :ref:`proc_exec_body` contents (:term:`I/O`), the desired :ref:`proc_exec_mode`, and +the :ref:`proc_exec_results` options. However, an *additional* ``process`` :term:`URL` in the request body is required, +to indicate which :term:`Process` should be executed by the :term:`Job`. + +The |job-exec-req|_ operation allows interoperability alignement with other execution strategies, such as defined +by the |openeo-api|_ and the |ogc-tb20-gdc|_ *GDC API Profile*. It also opens the door for advanced :term:`Workflow` +definitions from a common :term:`Job` endpoint interface, as described by the |ogc-api-proc-part4|_ extension. + +Furthermore, an optional ``"status": "create"`` request body parameter can be supplied to indicate to the :term:`Job` +that it should remain in *pending* state, until a later :ref:`Job Execution Trigger ` is performed +to start its execution. This allows the user to apply any desired :ref:`Job Updates ` or reviewing +the resolved :ref:`proc_op_job_inputs` prior to submitting the :term:`Job`. This acts in contrast to +the *Core* |exec-req|_ operation that *immediately* places the :term:`Job` in queue, locking it from any update. + +.. _proc_op_job_update: + +Updating a Job +--------------------------------------------------------------------- + +The |job-update-req|_ request allows updating the :term:`Job` and its underlying parameters prior to execution. +For this reason, it has for pre-requirement to be in ``created`` :ref:`Job Status `, such that +it is pending a :ref:`Job Execution Trigger ` before being sent to the worker execution queue. +For any other ``status`` than ``created``, attempts to modify the :term:`Job` will return an *HTTP 423 Locked* error +response. + +Potential parameters that can be updated are: + +- Submitted :term:`Process` ``inputs`` +- Desired ``outputs`` formats and representations, as per :ref:`proc_exec_results` +- Applicable ``headers``, ``response`` and ``mode`` options as per :ref:`proc_exec_mode` +- Additional metadata such as a custom :term:`Job` ``title`` + +After updating the :term:`Job`, the :ref:`Job Status ` and :ref:`Job Inputs ` +operations can further be performed to review the *pending* :term:`Job` state. Using all those operations allows the +user to iteratively adjust the :term:`Job` until it is ready for execution, for which +the :ref:`Job Execution Trigger ` would then be employed. + +.. _proc_op_job_trigger: + +Triggering Job Execution +--------------------------------------------------------------------- + +The |job-trigger-req|_ request allows submitting a *pending* :term:`Job` to the worker execution queue. Once performed, +the typical :ref:`proc_op_monitor` operation can be employed, until eventual success or failure of the :term:`Job`. + +If the :term:`Job` was already submitted, is already in queue, is actively running, or already finished execution, +this operation will return a *HTTP 423 Locked* error response. + .. _proc_op_job_status: .. _proc_op_status: .. _proc_op_monitor: -Monitoring of a Process Execution (GetStatus) +Monitoring a Job Execution (GetStatus) --------------------------------------------------------------------- Monitoring the execution of a :term:`Job` consists of polling the status ``Location`` provided from the -:ref:`Execute ` operation and verifying the indicated ``status`` for the expected result. -The ``status`` can correspond to any of the value defined by :data:`weaver.status.JOB_STATUS_VALUES` -accordingly to the internal state of the workers processing their execution. +:ref:`Execute ` or :ref:`Trigger ` operation and verifying the +indicated ``status`` for the expected result. +The ``status`` can correspond to any of the value defined by :class:`weaver.status.Status` +accordingly to the internal state of the workers processing their execution, and the +negotiated :ref:`proc_op_job_status_alt` representation. When targeting a :term:`Job` submitted to a `Weaver` instance, monitoring is usually accomplished through the :term:`OGC API - Processes` endpoint using |status-req|_, which will return a :term:`JSON` body. @@ -2029,21 +2110,58 @@ format is employed according to the chosen location. - Location * - :term:`OGC API - Processes` - :term:`JSON` - - ``{WEAVER_URL}/jobs/{JobUUID}`` + - ``{WEAVER_URL}/jobs/{jobID}`` * - :term:`WPS` - :term:`XML` - - ``{WEAVER_WPS_OUTPUTS}/{JobUUID}.xml`` + - ``{WEAVER_WPS_OUTPUTS}/{jobID}.xml`` .. seealso:: For the :term:`WPS` endpoint, refer to :ref:`conf_settings`. -.. fixme: add example -.. fixme: describe minimum fields and extra fields +Following are examples for both representations. Note that results might vary according to other parameters such +as when using :ref:`proc_op_job_status_alt`, or when different :term:`Process` references or :term:`Workflow` +definitions are involved. + +.. literalinclude:: ../examples/job_status_ogcapi.json + :language: json + :caption: :term:`Job` Status in :term:`JSON` using the :term:`OGC API - Processes` interface + :name: job-status-ogcapi + +.. literalinclude:: ../examples/job_status_wps.xml + :language: xml + :caption: :term:`Job` Status in :term:`XML` using the :term:`WPS` interface + :name: job-status-wps + +.. _proc_op_job_status_alt: + +Alternate Job Status +~~~~~~~~~~~~~~~~~~~~ + +In order to support alternate :term:`Job` status representations, the following approaches can be used when performing +the |status-req|_ request. + +- Specify either a ``profile`` or ``schema`` query parameter (e.g.: ``/jobs/{jobID}?profile=openeo``). +- Specify a ``profile`` parameter within the ``Accept`` header (e.g.: ``Accept: application/json; profile=openeo``). + +Using the |openeo|_ profile for example, will allow returning ``status`` values that are appropriate +as per the |openeo-api|_ definition. + +When performing :ref:`Job Status ` requests, the received response should +contain a ``Content-Schema`` header indicating which of the applied ``profile`` is being represented. +This header is employed because multiple ``Content-Type: application/json`` headers are applicable +across multiple :term:`API` implementations and status representations. .. _proc_op_result: +.. _proc_op_job_detail: -Obtaining Job Outputs, Results, Logs or Errors ---------------------------------------------------------------------- +Obtaining Job Details and Metadata +---------------------------------- + +All endpoints to retrieve any of the following information about a :term:`Job` can either be requested directly +(i.e.: ``/jobs/{jobID}/...``) or with equivalent :term:`Provider` and/or :term:`Process` prefixed endpoints, +if the requested :term:`Job` did refer to those :term:`Provider` and/or :term:`Process`. +A *local* :term:`Process` would have its :term:`Job` references as ``/processes/{processId}/jobs/{jobID}/...`` +while a :ref:`proc_remote_provider` will use ``/provider/{providerName}/processes/{processId}/jobs/{jobID}/...``. .. _proc_op_job_outputs: @@ -2219,7 +2337,8 @@ Job Inputs In order to better understand the parameters that were *originally* submitted during :term:`Job` creation, the |inputs-req|_ can be employed. This will return both the data and reference ``inputs`` that were submitted, as well as the *requested* ``outputs`` [#outN]_ to retrieve any relevant ``transmissionMode``, ``format``, etc. -parameters that where specified during submission of the :ref:`proc_exec_body`. +parameters that where specified during submission of the :ref:`proc_exec_body`, and any other relevant ``headers`` +that can affect the :ref:`proc_exec_mode` and :ref:`proc_exec_results`. For convenience, this endpoint also returns relevant ``links`` applicable for the requested :term:`Job`. .. literalinclude:: ../examples/job_inputs.json @@ -2230,6 +2349,9 @@ For convenience, this endpoint also returns relevant ``links`` applicable for th .. note:: The ``links`` presented above are not an exhaustive list to keep the example relatively small. +If the :term:`Job` is still pending execution, the parameters returned by this endpoint can be modified +using the :ref:`proc_op_job_update` operation before submitting it. + .. _proc_op_job_error: .. _proc_op_job_exceptions: @@ -2278,12 +2400,33 @@ Note again that the more the :term:`Process` is verbose, the more tracking will :caption: Example :term:`JSON` Representation of :term:`Job` Logs Response :name: job-logs +.. _proc_op_job_prov: + +Job Provenance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. fixme: CWL and Job Prov (https://github.com/crim-ca/weaver/issues/673) +.. todo:: + implement ``GET /jobs/{jobID}/run`` and/or ``GET /jobs/{jobID}/prov`` + (see https://github.com/crim-ca/weaver/issues/673) + +.. _proc_op_job_stats: + +Job Statistics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + .. note:: - All endpoints to retrieve any of the above information about a :term:`Job` can either be requested directly - (i.e.: ``/jobs/{jobID}/...``) or with equivalent :term:`Provider` and/or :term:`Process` prefixed endpoints, - if the requested :term:`Job` did refer to those :term:`Provider` and/or :term:`Process`. - A *local* :term:`Process` would have its :term:`Job` references as ``/processes/{processId}/jobs/{jobID}/...`` - while a :ref:`proc_remote_provider` will use ``/provider/{providerName}/processes/{processId}/jobs/{jobID}/...``. + This feature is specific to `Weaver`. + +The |job-stats-req|_ request can be performed to obtain runtime statistics from the :term:`Job`. +This content is only available when a :term:`Job` has successfully completed. +Below is a sample of possible response. Some parts might be omitted according to the +internal :term:`Application Package` of the :term:`Process` represented by the :term:`Job` execution. + +.. literalinclude:: ../../weaver/wps_restapi/examples/job_statistics.json + :language: json + :caption: Example :term:`JSON` of :term:`Job` Statistics Response + :name: job-statistics .. _vault_upload: diff --git a/docs/source/references.rst b/docs/source/references.rst index a2461a14c..d23bed44e 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -149,6 +149,11 @@ .. _ogc-api-proc-part3: https://docs.ogc.org/DRAFTS/21-009.html .. |ogc-api-proc-part3-collection-input| replace:: *Collection Input* .. _ogc-api-proc-part3-collection-input: https://docs.ogc.org/DRAFTS/21-009.html#section_collection_input +.. |ogc-api-proc-part4| replace:: *OGC API - Processes* - Part 4: Job Management +.. fixme: replace by draft location when available +.. _ogc-api-proc-part4: https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/job_management +.. |ogc-tb20-gdc| replace:: *OGC Testbed-20 - GeoDataCubes* +.. _ogc-tb20-gdc: https://www.ogc.org/initiatives/ogc-testbed-20/ .. |ogc-proc-ext-billing| replace:: *OGC API - Processes* - Billing extension .. _ogc-proc-ext-billing: https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/billing .. |ogc-proc-ext-quotation| replace:: *OGC API - Processes* - Quotation extension @@ -162,6 +167,10 @@ .. _ONNX-long: `ONNX`_ .. |ONNX| replace:: ONNX .. _ONNX: https://onnx.ai/ +.. |openeo| replace:: openEO +.. _openeo: https://openeo.org/ +.. |openeo-api| replace:: openEO API +.. _openeo-api: https://openeo.org/documentation/1.0/developers/api/reference.html .. |OpenAPI-spec| replace:: OpenAPI Specification .. _OpenAPI-spec: https://spec.openapis.org/oas/v3.1.0 .. |pywps| replace:: PyWPS @@ -246,6 +255,14 @@ .. _exec-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Processes/paths/~1processes~1{process_id}~1execution/post .. |exec-req-job| replace:: ``POST {WEAVER_URL}/processes/{processID}/jobs`` (Execute) .. _exec-req-job: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Processes%2Fpaths%2F~1processes~1{process_id}~1jobs%2Fpost +.. |job-exec-req| replace:: ``POST {WEAVER_URL}/jobs`` (Create) +.. _job-exec-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Jobs/paths/~1jobs/post +.. |job-update-req| replace:: ``PATCH {WEAVER_URL}/jobs/{jobID}`` (Update) +.. _job-update-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Jobs/paths/~1jobs~1{job_id}/patch +.. |job-trigger-req| replace:: ``POST {WEAVER_URL}/jobs{jobID}/results`` (Trigger) +.. _job-trigger-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Jobs/paths/~1jobs~1{job_id}~1results/post +.. |job-stats-req| replace:: ``GET {WEAVER_URL}/jobs{jobID}/statistics`` +.. _job-stats-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Jobs/paths/~1jobs~1{job_id}~1statistics/get .. |vis-req| replace:: ``PUT {WEAVER_URL}/processes/{processID}/visibility`` (Visibility) .. _vis-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Processes%2Fpaths%2F~1processes~1%7Bprocess_id%7D~1visibility%2Fput .. |pkg-req| replace:: ``GET {WEAVER_URL}/processes/{processID}/package`` (Package) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index d47824432..3747364db 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -689,7 +689,8 @@ def submit_job_dispatch_wps(request, process): Dispatch a :term:`XML` request to the relevant :term:`Process` handler using the :term:`WPS` endpoint. Sends the :term:`XML` request to the :term:`WPS` endpoint which knows how to parse it properly. - Execution will end up in the same :func:`submit_job_handler` function as for :term:`OGC API` :term:`JSON` execution. + Execution will end up in the same :func:`submit_job_handler` function as for :term:`OGC API - Processes` + :term:`JSON` execution. .. warning:: The function assumes that :term:`XML` was pre-validated as present in the :paramref:`request`. From 5c821ca583b8ca56e873dcb78abbeb7f4b51271c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 25 Oct 2024 13:23:44 -0400 Subject: [PATCH 31/33] add more details to validate Part 4 error type are returned as required --- tests/functional/test_wps_package.py | 78 ++++++++++++++++++++++++++++ tests/wps_restapi/test_api.py | 16 ++++++ weaver/wps_restapi/api.py | 7 ++- weaver/wps_restapi/jobs/jobs.py | 50 +++++++++++++----- 4 files changed, 137 insertions(+), 14 deletions(-) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 033dabda1..9a34fc448 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -5745,6 +5745,84 @@ def test_execute_jobs_create_trigger(self): }, } + @pytest.mark.oap_part4 + def test_execute_jobs_process_not_found(self): + # use non-existing process to ensure this particular situation is handled as well + # a missing process reference must not cause an early "not-found" response + proc = "random-other-process" + proc = self.fully_qualified_test_name(proc) + + exec_content = { + "process": f"https://localhost/processes/{proc}", + "inputs": {"message": "test"} + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=self.json_headers, only_local=True) + assert resp.status_code == 404, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process" + + @pytest.mark.oap_part4 + def test_execute_jobs_process_malformed_json(self): + exec_content = { + "process": "xyz", + "inputs": {"message": "test"} + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, timeout=5, + data=exec_content, headers=self.json_headers, only_local=True) + assert resp.status_code == 400, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process" + assert resp.json["cause"] == {"in": "body", "process": "xyz"} + + @pytest.mark.oap_part4 + def test_execute_jobs_process_malformed_xml(self): + exec_content = """ + + + + """ + headers = { + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.APP_XML, + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post", path, timeout=5, + data=exec_content, headers=headers, only_local=True) + assert resp.status_code == 400, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + assert resp.json["type"] == "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process" + assert resp.json["cause"] == {"in": "body", "ows:Identifier": None} + + @pytest.mark.oap_part4 + def test_execute_jobs_unsupported_media_type(self): + headers = { + "Accept": ContentType.APP_JSON, + "Content-Type": ContentType.TEXT_PLAIN, + } + with contextlib.ExitStack() as stack: + for mock_exec in mocked_execute_celery(): + stack.enter_context(mock_exec) + path = "/jobs" + resp = mocked_sub_requests(self.app, "post", path, timeout=5, data="", headers=headers, only_local=True) + assert resp.status_code == 415, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}" + assert resp.content_type == ContentType.APP_JSON + assert resp.json["type"] == ( + "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type" + ) + assert resp.json["cause"] == {"in": "headers", "name": "Content-Type", "value": ContentType.TEXT_PLAIN} + @pytest.mark.functional class WpsPackageAppWithS3BucketTest(WpsConfigBase, ResourcesUtil): diff --git a/tests/wps_restapi/test_api.py b/tests/wps_restapi/test_api.py index 16af9d0b9..061c200f4 100644 --- a/tests/wps_restapi/test_api.py +++ b/tests/wps_restapi/test_api.py @@ -160,6 +160,22 @@ def test_openapi_includes_schema(self): assert "$id" in body["components"]["schemas"]["CWL"] assert body["components"]["schemas"]["CWL"]["$id"] == sd.CWL_SCHEMA_URL + def test_openapi_jobs_create_description(self): + """ + Ensure the correct docstring is picked up by multiple service decorators across view functions. + + .. seealso:: + - :func:`weaver.wps_restapi.jobs.jobs.create_job` + - :func:`weaver.wps_restapi.jobs.jobs.create_job_unsupported_media_type` + """ + resp = self.app.get(sd.openapi_json_service.path, headers=self.json_headers) + assert resp.status_code == 200 + body = resp.json + + for field in ["summary", "description"]: + desc = body["paths"][sd.jobs_service.path]["post"].get(field, "") + assert not desc or "Create a new processing job" in desc + def test_status_unauthorized_and_forbidden(self): """ Validates that 401/403 status codes are correctly handled and that the appropriate one is returned. diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 0f6abb491..adee8b822 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -1075,9 +1075,14 @@ def format_response_details(response, request): http_headers = get_header("Content-Type", http_response.headers) or [] req_headers = get_header("Accept", request.headers) or [] if any([ContentType.APP_JSON in http_headers, ContentType.APP_JSON in req_headers]): + req_detail = get_request_info(request) + # return the response instead of generate less detailed one if it was already formed with JSON error details + # this can happen when a specific code like 404 triggers a pyramid lookup against other route/view handlers + if isinstance(response, HTTPException) and isinstance(req_detail, dict): + return response body = OWSException.json_formatter(http_response.status, response.message or "", http_response.title, request.environ) - body["detail"] = get_request_info(request) + body["detail"] = req_detail http_response._json = body if http_response.status_code != response.status_code: raise http_response # re-raise if code was fixed diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index ec50c11fe..e5a17e953 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -15,7 +15,7 @@ from weaver import xml_util from weaver.database import get_db from weaver.datatype import Job -from weaver.exceptions import JobNotFound, JobStatisticsNotFound, ProcessNotFound, log_unhandled_exceptions +from weaver.exceptions import JobNotFound, JobStatisticsNotFound, log_unhandled_exceptions from weaver.execute import parse_prefer_header_execute_mode, rebuild_prefer_header from weaver.formats import ( ContentType, @@ -226,6 +226,8 @@ def create_job(request): """ Create a new processing job with advanced management and execution capabilities. """ + proc_key = "process" + proc_url = None proc_id = None prov_id = None try: @@ -238,23 +240,17 @@ def create_job(request): prov_parts = prov_url.rsplit("/providers/", 1) prov_id = prov_parts[-1] if len(prov_parts) > 1 else None elif ctype in ContentType.ANY_XML: + proc_key = "ows:Identifier" body_xml = xml_util.fromstring(request.text) - proc_id = body_xml.xpath("ows:Identifier", namespaces=body_xml.getroottree().nsmap)[0].text + proc_id = body_xml.xpath(proc_key, namespaces=body_xml.getroottree().nsmap)[0].text except Exception as exc: - raise ProcessNotFound(json={ + raise HTTPBadRequest(json={ "title": "NoSuchProcess", "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process", - "detail": "Process URL or identifier reference missing or invalid.", - "status": ProcessNotFound.code, + "detail": "Process URL or identifier reference could not be parsed.", + "status": HTTPBadRequest.code, + "cause": {"in": "body", proc_key: repr_json(proc_url, force_string=False)} }) from exc - if not proc_id: - raise HTTPUnsupportedMediaType(json={ - "title": "Unsupported Media Type", - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type", - "detail": "Process URL or identifier reference missing or invalid.", - "status": HTTPUnsupportedMediaType.code, - "cause": {"headers": {"Content-Type": ctype}}, - }) if ctype in ContentType.ANY_XML: process = get_process(process_id=proc_id) @@ -268,6 +264,34 @@ def create_job(request): return submit_job(request, ref, process_id=proc_id, tags=["wps-rest", "ogc-api"]) +@sd.jobs_service.post() +def create_job_unsupported_media_type(request): + # type: (PyramidRequest) -> AnyViewResponse + """ + Handle the case where no ``content_type`` was matched for decorated service handlers on :func:`create_job`. + + This operation must be defined with a separate service decorator allowing "any" ``content_type`` because + match by ``content_type`` is performed prior to invoking the applicable decorated view function. + Therefore, using a custom ``error_handler`` on the decorators of :func:`create_job` would never be invoked + since their preconditions would never be encountered. Decorated views that provide a ``content_type`` explicitly + are prioritized. Therefore, this will match any fallback ``content_type`` not already defined by another decorator. + + .. warning:: + It is very important that this is defined after :func:`create_job` such that its docstring is employed for + rendering the :term:`OpenAPI` definition instead of this docstring. + """ + ctype = get_header("Content-Type", request.headers) + return HTTPUnsupportedMediaType( + json={ + "title": "Unsupported Media Type", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-4/1.0/unsupported-media-type", + "detail": "Process URL or identifier reference missing or invalid.", + "status": HTTPUnsupportedMediaType.code, + "cause": {"in": "headers", "name": "Content-Type", "value": ctype}, + } + ) + + @sd.process_results_service.post( tags=[sd.TAG_JOBS, sd.TAG_EXECUTE, sd.TAG_RESULTS, sd.TAG_PROCESSES], schema=sd.ProcessJobResultsTriggerExecutionEndpoint(), From 5c3b2940f58be36ff655086d1bd5ea91d9449259 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 25 Oct 2024 15:50:15 -0400 Subject: [PATCH 32/33] fix CLI test depending on return job status response --- tests/functional/test_cli.py | 51 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 955bdb634..19dc75084 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -2280,37 +2280,36 @@ def test_job_statistics(self): body = json.loads(text) assert body == job.statistics - def test_job_info_wrong_status(self): + @parameterized.expand([ + ("results", Status.FAILED, "JobResultsFailed", True), + ("statistics", Status.FAILED, "NoJobStatistics", True), + ("exceptions", Status.FAILED, repr_json(["failed"], force_string=True, indent=2), False), + ]) + def test_job_info_status_dependant(self, operation, status, expect, expect_error): # results/statistics must be in success status job = self.job_store.save_job(task_id=uuid.uuid4(), process="test-process", access=Visibility.PUBLIC) job.statistics = resources.load_example("job_statistics.json") job.save_log(message="Some info", status=Status.ACCEPTED, errors=ValueError("failed")) job = self.job_store.update_job(job) - - for operation, status, expect in [ - ("results", Status.FAILED, "JobResultsFailed"), - ("statistics", Status.FAILED, "404 Not Found"), - # ("exceptions", Status.SUCCEEDED, "404 Not Found"), # no error, just irrelevant or empty - ]: - job.status = status - job = self.job_store.update_job(job) - lines = mocked_sub_requests( - self.app, run_command, - [ - # "weaver", - operation, - "-u", self.url, - "-j", str(job.id), - "-nL", - ], - trim=False, - entrypoint=weaver_cli, - only_local=True, - expect_error=True, - ) - assert len(lines) - text = "".join(lines) - assert expect in text + job.status = status + job = self.job_store.update_job(job) + lines = mocked_sub_requests( + self.app, run_command, + [ + # "weaver", + operation, + "-u", self.url, + "-j", str(job.id), + "-nL", + ], + trim=False, + entrypoint=weaver_cli, + only_local=True, + expect_error=expect_error, + ) + assert len(lines) + text = "\n".join(lines) + assert expect in text def test_execute_remote_input(self): """ From 0f61ab2b844f07446067246863d67256c788a732 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 30 Oct 2024 13:51:41 -0400 Subject: [PATCH 33/33] Update references.rst adding published HTML draft for OAP Part 4 --- docs/source/references.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/references.rst b/docs/source/references.rst index d23bed44e..9b0c3ffec 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -150,8 +150,7 @@ .. |ogc-api-proc-part3-collection-input| replace:: *Collection Input* .. _ogc-api-proc-part3-collection-input: https://docs.ogc.org/DRAFTS/21-009.html#section_collection_input .. |ogc-api-proc-part4| replace:: *OGC API - Processes* - Part 4: Job Management -.. fixme: replace by draft location when available -.. _ogc-api-proc-part4: https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/job_management +.. _ogc-api-proc-part4: https://docs.ogc.org/DRAFTS/24-051.html .. |ogc-tb20-gdc| replace:: *OGC Testbed-20 - GeoDataCubes* .. _ogc-tb20-gdc: https://www.ogc.org/initiatives/ogc-testbed-20/ .. |ogc-proc-ext-billing| replace:: *OGC API - Processes* - Billing extension