From b2f2ddceff60a9e5fec31762856bc814f18970bc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Silva Date: Mon, 25 Oct 2021 20:07:30 +0100 Subject: [PATCH] Harvester arcgis (#8229) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Bump urllib3 from 1.26.2 to 1.26.3 (#6908) Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.2 to 1.26.3. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/1.26.3/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.2...1.26.3) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Toni * [Fixes #6880] Circle CI upload tests fail irregulary (#6881) * [Fixes #6880] Circle CI upload tests fail irregulary * CircleCI test fix: sometimes expires due to upload timeout in the test environment * - Avoid infinite loop on upload testing * Revert "CircleCI test fix: sometimes expires due to upload timeout in the test environment" This reverts commit 66139fdbf0b7510a9829a3e01254f41782fb7e1d. Co-authored-by: Alessio Fabiani Co-authored-by: afabiani * [Fixes #6914] Remove "add to basket" tool for documents and maps (#6915) * Added malnajdi as contributor * [Fixes #6910] meaningful filename for document download (#6911) * get meaningful document filenames on download * - Strip extension from document title before slugify it (e.g.: image.jpg instead of imagejpg.jpg) Co-authored-by: afabiani Co-authored-by: Alessio Fabiani * - CircleCI Upload Tests: trying to reduce more the risk of infinite loop on "wait_for_progress" * [Fixes #6916] gsimporter.api.NotFound caused by missing trailing slash at the end of GEOSERVER_LOCATION (#6913) * [Fixes #6916] gsimporter.api.NotFound caused by missing trailing slash at the end of GEOSERVER_LOCATION * [Fixes #6916] unit test for GEOSERVER_LOCATION * Bump django-cors-headers from 3.6.0 to 3.7.0 (#6901) Bumps [django-cors-headers](https://github.com/adamchainz/django-cors-headers) from 3.6.0 to 3.7.0. - [Release notes](https://github.com/adamchainz/django-cors-headers/releases) - [Changelog](https://github.com/adamchainz/django-cors-headers/blob/master/HISTORY.rst) - [Commits](https://github.com/adamchainz/django-cors-headers/compare/3.6.0...3.7.0) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump amqp from 5.0.3 to 5.0.5 (#6905) Bumps [amqp](https://github.com/celery/py-amqp) from 5.0.3 to 5.0.5. - [Release notes](https://github.com/celery/py-amqp/releases) - [Changelog](https://github.com/celery/py-amqp/blob/master/Changelog) - [Commits](https://github.com/celery/py-amqp/compare/v5.0.3...v5.0.5) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump pip from 21.0 to 21.0.1 (#6900) Bumps [pip](https://github.com/pypa/pip) from 21.0 to 21.0.1. - [Release notes](https://github.com/pypa/pip/releases) - [Changelog](https://github.com/pypa/pip/blob/master/NEWS.rst) - [Commits](https://github.com/pypa/pip/compare/21.0...21.0.1) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump coverage from 5.3.1 to 5.4 (#6903) Bumps [coverage](https://github.com/nedbat/coveragepy) from 5.3.1 to 5.4. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/coverage-5.3.1...coverage-5.4) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump pytest from 6.2.1 to 6.2.2 (#6907) Bumps [pytest](https://github.com/pytest-dev/pytest) from 6.2.1 to 6.2.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/6.2.1...6.2.2) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump djangorestframework-gis from 0.16 to 0.17 (#6902) Bumps [djangorestframework-gis](https://github.com/openwisp/django-rest-framework-gis) from 0.16 to 0.17. - [Release notes](https://github.com/openwisp/django-rest-framework-gis/releases) - [Changelog](https://github.com/openwisp/django-rest-framework-gis/blob/master/CHANGES.rst) - [Commits](https://github.com/openwisp/django-rest-framework-gis/compare/v0.16.0...v0.17.0) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * - Algin setup.cfg to requirements.txt * [Fixes #6922][REST API v2] Expose the curated thumbnail URL if it has… (#6923) * [Fixes #6922][REST API v2] Expose the curated thumbnail URL if it has been uploaded * - Add REST APIs test suite to CircleCI * [Fixes #6918] Removal of QGIS support (#6919) * [Cleanup and Refactor] Remove QGIS server backend dependencies * [Cleanup and Refactor] Remove QGIS server backend dependencies * - Fix LGTM issues * allow Basic authenticated requests in LOCKDOWN mode * fix to avoid circular import * flake8 check fix * added tests * [Fixes #6880] Circle CI upload tests fail irregulary (#6881) * [Fixes #6880] Circle CI upload tests fail irregulary * CircleCI test fix: sometimes expires due to upload timeout in the test environment * - Avoid infinite loop on upload testing * Revert "CircleCI test fix: sometimes expires due to upload timeout in the test environment" This reverts commit 66139fdbf0b7510a9829a3e01254f41782fb7e1d. Co-authored-by: Alessio Fabiani Co-authored-by: afabiani * [Fixes #6914] Remove "add to basket" tool for documents and maps (#6915) * Added malnajdi as contributor * Bump pip from 21.0 to 21.0.1 (#6900) Bumps [pip](https://github.com/pypa/pip) from 21.0 to 21.0.1. - [Release notes](https://github.com/pypa/pip/releases) - [Changelog](https://github.com/pypa/pip/blob/master/NEWS.rst) - [Commits](https://github.com/pypa/pip/compare/21.0...21.0.1) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * - Algin setup.cfg to requirements.txt * [Fixes #6922][REST API v2] Expose the curated thumbnail URL if it has… (#6923) * [Fixes #6922][REST API v2] Expose the curated thumbnail URL if it has been uploaded * - Add REST APIs test suite to CircleCI * [Fixes #6918] Removal of QGIS support (#6919) * [Cleanup and Refactor] Remove QGIS server backend dependencies * [Cleanup and Refactor] Remove QGIS server backend dependencies * - Fix LGTM issues * allow Basic authenticated requests in LOCKDOWN mode * fix to avoid circular import * - Align to upstream master branch * [Fixes #7945] Ingest harvested layer data to geonode * Improve harvesting session and the admin * fix migration files conflict * Initial work for implementing stoppable harvesting sessions * Implement aborting of harvesting celery tasks Add the `AsynchronousHarvestingSession` model, which is used to implement sessions for both refreshing of a harvester's harvestable resources and for the harvesting of remote resources. Refactor the `admin`, `api` and `tasks` to use this new model. Moved some functions out of `harvesting.utils` module in order to avoid circular imports * fix tests * fix conflicts * Add a couple more tests * Uncomment `settings.py` line that designates the GeoNode test runner as the one to be used * Add harvesting scheduler task * Improving support for the harvesting scheduler task * Continue implementation of the harvesting scheduler task * Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore * Revert "Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore" This reverts commit 8d50cf204fa519ff869c0e2cc0e2a1d66123430d. * Remove reference to the django-celery-beat app in the harvesting models and signals * Implement different strategy for the harvesting scheduler - remove django-celery-beat as a dependency - implement a simple harvesting scheduler as a celery task - implement an action for resetting a harvester's status - add (and fix) tests * Add harvesting scheduler task * Improving support for the harvesting scheduler task * Continue implementation of the harvesting scheduler task * Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore * [Fixes #7945] Ingest harvested layer data to geonode * Improve harvesting session and the admin * fix migration files conflict * Implement aborting of harvesting celery tasks Add the `AsynchronousHarvestingSession` model, which is used to implement sessions for both refreshing of a harvester's harvestable resources and for the harvesting of remote resources. Refactor the `admin`, `api` and `tasks` to use this new model. Moved some functions out of `harvesting.utils` module in order to avoid circular imports * fix tests * fix conflicts * Uncomment `settings.py` line that designates the GeoNode test runner as the one to be used * Add harvesting scheduler task * Improving support for the harvesting scheduler task * Revert "Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore" This reverts commit 8d50cf204fa519ff869c0e2cc0e2a1d66123430d. * Remove reference to the django-celery-beat app in the harvesting models and signals * Implement different strategy for the harvesting scheduler - remove django-celery-beat as a dependency - implement a simple harvesting scheduler as a celery task - implement an action for resetting a harvester's status - add (and fix) tests * Improving support for the harvesting scheduler task * Continue implementation of the harvesting scheduler task * Begin implementation of a harvester for current GeoNode version * Continue implementation of a modern harvester for remote GeoNode deployments * Rebase and fix migration conflicts * Fix invalid tiemporal computation after creating a new harvester * fixing pagination (wip) * Fix bugs with remote harvested layers * Try out both geonode harvester worker classes and iron out inconsistencies * Remove commented line * Fix tests * Harvester API serializer no longer tries to update harvestable resources upon creation. This makes it behave in a similar way as when a harvester is created through the django admin * Made GeonodeUnifiedHarvesterWorker the default harvester for geonode remotes * Fix bits that were incorrectly merged when rebasing * fix flake8 errors * [Fixes #7945] Ingest harvested layer data to geonode * Improve harvesting session and the admin * fix migration files conflict * Initial work for implementing stoppable harvesting sessions * Implement aborting of harvesting celery tasks Add the `AsynchronousHarvestingSession` model, which is used to implement sessions for both refreshing of a harvester's harvestable resources and for the harvesting of remote resources. Refactor the `admin`, `api` and `tasks` to use this new model. Moved some functions out of `harvesting.utils` module in order to avoid circular imports * fix tests * fix conflicts * Remove accidental duplication of code that crept in during conflict resolution * Add a couple more tests * Uncomment `settings.py` line that designates the GeoNode test runner as the one to be used * Add harvesting scheduler task * Improving support for the harvesting scheduler task * Continue implementation of the harvesting scheduler task * Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore * Revert "Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore" This reverts commit 8d50cf204fa519ff869c0e2cc0e2a1d66123430d. * Remove reference to the django-celery-beat app in the harvesting models and signals * Implement different strategy for the harvesting scheduler - remove django-celery-beat as a dependency - implement a simple harvesting scheduler as a celery task - implement an action for resetting a harvester's status - add (and fix) tests * Add harvesting scheduler task * Continue implementation of the harvesting scheduler task * Improve harvesting session and the admin * Initial work for implementing stoppable harvesting sessions * Implement aborting of harvesting celery tasks Add the `AsynchronousHarvestingSession` model, which is used to implement sessions for both refreshing of a harvester's harvestable resources and for the harvesting of remote resources. Refactor the `admin`, `api` and `tasks` to use this new model. Moved some functions out of `harvesting.utils` module in order to avoid circular imports * fix tests * Update signature of get_resource method in WMS harvester worker * Add harvesting scheduler task * Improving support for the harvesting scheduler task * Continue implementation of the harvesting scheduler task * Revert "Remove the dependency on `django-celery-beat`, as the harvesting does not need it anymore" This reverts commit 8d50cf204fa519ff869c0e2cc0e2a1d66123430d. * Remove reference to the django-celery-beat app in the harvesting models and signals * Implement different strategy for the harvesting scheduler - remove django-celery-beat as a dependency - implement a simple harvesting scheduler as a celery task - implement an action for resetting a harvester's status - add (and fix) tests * Improving support for the harvesting scheduler task * fix migration conflicts * Begin implementation of ArcGIS-related harvester * Continue implementation of ArcGIS-related harvester * Continue implementation of arcgis harvester * Continue implementation of arcgis harvester * HAndling harvesting of MapServer layers * Move gxp enumerations from the services package to the layers one * Modify `dataset_detail()` view in order to configure gxp layer for layers that come from ArcGIS REST services * Conclude implementation of ArcGIS harvester * Add some tests * Update code * Implement resource_name_filter for ESRI harvester * Minor changes to keep compatibility with Remote Services * [ArcGIS Harvester] Make sure we can parse also nested services (e.g.: https://pro-ags2.dfs.un.org/arcgis/rest/services/UNMISS/UNMISS_Road_Rehabilitation/MapServer) * [Remote Services] Re-enable ArcGIS Remote Services * Improve WMS harvester based on recent develpments * - Better management of the "ll_bbox" and "thumbnail" at the "ResourceManager" level * Remove `name` from the default geonode parameters generated by the base harvester This was a regression. Since GeoNode documents do not have a name, this field must only be added when relevant (e.g. when creating datasets) * [CircleCI] Fix test cases Co-authored-by: Giovanni Allegri Co-authored-by: allyoucanmap Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Toni Co-authored-by: Alessio Fabiani Co-authored-by: afabiani Co-authored-by: Florian Hoedt Co-authored-by: Mohammed Y. Alnajdi Co-authored-by: biegan Co-authored-by: meomancer --- geonode/base/models.py | 5 +- geonode/harvesting/admin.py | 3 - geonode/harvesting/api/serializers.py | 8 +- geonode/harvesting/apps.py | 34 - geonode/harvesting/config.py | 4 +- geonode/harvesting/harvesters/arcgis.py | 607 ++++++++++++++ geonode/harvesting/harvesters/base.py | 9 +- .../harvesting/harvesters/geonodeharvester.py | 774 +++++++++++++++--- geonode/harvesting/harvesters/wms.py | 43 +- .../0046_alter_harvester_harvester_type.py | 18 + ...7_convert_geonode_harvesters_to_unified.py | 37 + .../0048_alter_harvester_harvester_type.py | 18 + .../0049_alter_harvester_harvester_type.py | 18 + geonode/harvesting/resourcedescriptor.py | 25 +- geonode/harvesting/tasks.py | 2 - geonode/harvesting/tests/factories.py | 1 - .../harvesting/tests/harvesters/geonode.py | 2 +- geonode/harvesting/tests/test_admin.py | 2 +- .../harvesting/tests/test_api_serializers.py | 11 +- .../tests/test_harvester_worker_arcgis.py | 78 ++ .../test_harvester_worker_geonode_legacy.py | 2 +- .../tests/test_harvester_worker_wms.py | 32 + geonode/harvesting/tests/test_tasks.py | 1 - geonode/harvesting/tests/test_utils.py | 56 ++ geonode/layers/enumerations.py | 11 + geonode/layers/migrations/0037_layer_ptype.py | 2 +- geonode/layers/views.py | 24 +- geonode/maps/tests.py | 4 +- geonode/resource/manager.py | 4 - geonode/resource/utils.py | 48 +- geonode/services/enumerations.py | 20 +- geonode/services/forms.py | 4 +- .../migrations/0053_alter_service_type.py | 18 + geonode/services/models.py | 3 +- geonode/services/serviceprocessors/arcgis.py | 40 +- geonode/services/serviceprocessors/handler.py | 6 +- geonode/settings.py | 2 +- geonode/thumbs/tests/test_unit.py | 6 +- geonode/thumbs/utils.py | 16 +- geonode/utils.py | 3 +- 40 files changed, 1707 insertions(+), 294 deletions(-) create mode 100644 geonode/harvesting/harvesters/arcgis.py create mode 100644 geonode/harvesting/migrations/0046_alter_harvester_harvester_type.py create mode 100644 geonode/harvesting/migrations/0047_convert_geonode_harvesters_to_unified.py create mode 100644 geonode/harvesting/migrations/0048_alter_harvester_harvester_type.py create mode 100644 geonode/harvesting/migrations/0049_alter_harvester_harvester_type.py create mode 100644 geonode/harvesting/tests/test_harvester_worker_arcgis.py create mode 100644 geonode/harvesting/tests/test_harvester_worker_wms.py create mode 100644 geonode/harvesting/tests/test_utils.py create mode 100644 geonode/services/migrations/0053_alter_service_type.py diff --git a/geonode/base/models.py b/geonode/base/models.py index ddde9644867..8fea7697b09 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -68,6 +68,7 @@ from geonode.utils import ( bbox_to_wkt, find_by_attr, + bbox_to_projection, is_monochromatic_image) from geonode.groups.models import GroupProfile from geonode.security.utils import get_visible_resources, get_geoapp_subtypes @@ -1426,7 +1427,9 @@ def set_bbox_polygon(self, bbox, srid): match = re.match(r'^(EPSG:)?(?P\d{4,6})$', str(srid)) bbox_polygon.srid = int(match.group('srid')) if match else 4326 try: - self.ll_bbox_polygon = bbox_polygon.transform(4326, clone=True) + # self.ll_bbox_polygon = bbox_polygon.transform(4326, clone=True) + self.ll_bbox_polygon = Polygon.from_bbox( + bbox_to_projection(list(bbox_polygon.extent) + [srid])[:-1]) except Exception as e: logger.error(e) self.ll_bbox_polygon = bbox_polygon diff --git a/geonode/harvesting/admin.py b/geonode/harvesting/admin.py index 613eb931a3b..93e5003aa1d 100644 --- a/geonode/harvesting/admin.py +++ b/geonode/harvesting/admin.py @@ -334,7 +334,6 @@ class HarvestableResourceAdmin(admin.ModelAdmin): "last_harvested", "unique_identifier", "title", - "abstract", "show_link_to_harvester", "should_be_harvested", "remote_resource_type", @@ -342,7 +341,6 @@ class HarvestableResourceAdmin(admin.ModelAdmin): readonly_fields = ( "unique_identifier", "title", - "abstract", "harvester", "last_updated", "last_refreshed", @@ -360,7 +358,6 @@ class HarvestableResourceAdmin(admin.ModelAdmin): ) search_fields = ( "title", - "abstract", ) list_editable = ( "should_be_harvested", diff --git a/geonode/harvesting/api/serializers.py b/geonode/harvesting/api/serializers.py index 4b999f6da37..ed3ddb41fae 100644 --- a/geonode/harvesting/api/serializers.py +++ b/geonode/harvesting/api/serializers.py @@ -162,13 +162,7 @@ def create(self, validated_data): f"Either omit it or provide a " f"value of {models.Harvester.STATUS_READY!r}" ) - harvester = super().create(validated_data) - available = harvester.update_availability() - if available: - harvester.status = harvester.STATUS_UPDATING_HARVESTABLE_RESOURCES - harvester.save() - tasks.update_harvestable_resources.apply_async(args=(harvester.pk,)) - return harvester + return super().create(validated_data) def update(self, instance: models.Harvester, validated_data): """Update harvester and perform any required business logic as a side-effect. diff --git a/geonode/harvesting/apps.py b/geonode/harvesting/apps.py index 8dfc2c3f2c1..4d6cde9a3b6 100644 --- a/geonode/harvesting/apps.py +++ b/geonode/harvesting/apps.py @@ -20,43 +20,10 @@ from django.apps import AppConfig from django.conf import settings from django.conf.urls import url, include -from django.db.models.signals import post_migrate from . import config -def run_setup_hooks(sender, **kwargs): - from django.utils import timezone - - # Initialize periodic tasks - if 'django_celery_beat' in settings.INSTALLED_APPS and \ - getattr(settings, 'CELERY_BEAT_SCHEDULER', None) == 'django_celery_beat.schedulers:DatabaseScheduler': - from django_celery_beat.models import ( - IntervalSchedule, - PeriodicTask, - ) - - secs = config.get_setting("HARVESTER_SCHEDULER_FREQUENCY_MINUTES") * 60 - check_intervals = IntervalSchedule.objects.filter(every=secs, period="seconds") - if not check_intervals.exists(): - check_interval, _ = IntervalSchedule.objects.get_or_create( - every=secs, - period="seconds" - ) - else: - check_interval = check_intervals.first() - - PeriodicTask.objects.update_or_create( - name="harvesting-scheduler", - defaults=dict( - task="geonode.harvesting.tasks.harvesting_scheduler", - interval=check_interval, - args='', - start_time=timezone.now() - ) - ) - - class HarvestingAppConfig(AppConfig): name = "geonode.harvesting" @@ -67,7 +34,6 @@ def ready(self): urlpatterns += [ url(r'^api/v2/', include('geonode.harvesting.api.urls')) ] - post_migrate.connect(run_setup_hooks, sender=self) settings.CELERY_BEAT_SCHEDULE['harvesting-scheduler'] = { "task": "geonode.harvesting.tasks.harvesting_scheduler", "schedule": config.get_setting("HARVESTER_SCHEDULER_FREQUENCY_MINUTES") * 60, diff --git a/geonode/harvesting/config.py b/geonode/harvesting/config.py index 8bcd014d68c..24a4d7ab57f 100644 --- a/geonode/harvesting/config.py +++ b/geonode/harvesting/config.py @@ -29,9 +29,9 @@ _DEFAULT_HARVESTERS: typing.Final = [ - "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester", + "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker", "geonode.harvesting.harvesters.wms.OgcWmsHarvester", - # "geonode.harvesting.harvesters.geonode.GeonodeCswHarvester", + "geonode.harvesting.harvesters.arcgis.ArcgisHarvesterWorker", ] diff --git a/geonode/harvesting/harvesters/arcgis.py b/geonode/harvesting/harvesters/arcgis.py new file mode 100644 index 00000000000..109e64ad997 --- /dev/null +++ b/geonode/harvesting/harvesters/arcgis.py @@ -0,0 +1,607 @@ +######################################################################### +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +"""Harvesters for ArcGIS based remote servers.""" +import re +import abc +import enum +import json +import logging +import typing +import uuid +from urllib.error import ( + HTTPError, + URLError, +) +import urllib.parse + +import arcrest +import requests +from django.contrib.gis import geos +from django.template.defaultfilters import slugify + +from geonode.layers.enumerations import GXP_PTYPES +from geonode.layers.models import Dataset + +from .. import ( + models, + resourcedescriptor, +) + +from . import base + +logger = logging.getLogger(__name__) + + +class ArcgisRestApiLayerType(enum.Enum): + GROUP_LAYER = "Group Layer" + FEATURE_LAYER = "Feature Layer" + + +class ArcgisServiceType(enum.Enum): + MAP_SERVICE = "MapServer" + FEATURE_SERVICE = "FeatureServer" + GEOCODE_SERVICE = "GeocodeServer" + GEOPROCESSING_SERVICE = "GPServer" + GEOMETRY_SERVICE = "GeometryServer" + IMAGE_SERVICE = "ImageServer" + NETWORK_SERVICE = "NAServer" + GEODATA_SERVICE = "GeoDataServer" + GLOBE_SERVICE = "GlobeServer" + MOBILE_SERVICE = "MobileServer" + + @classmethod + def has_value(cls, value): + return value in cls._value2member_map_ + + +def parse_remote_url(url: str) -> typing.Tuple[str, typing.Optional[str], typing.Optional[str]]: + """Parse the input url into the ArcGIS REST catalog URL and any service name.""" + url_fragments = url.partition("/rest/services") + catalog_url = "".join(url_fragments[:2]) + service_type = None + possible_service_name = None + service_type_regex = re.match(r'.*\/(.*Server).*', "".join(url_fragments[-1:])) + if service_type_regex: + for service_type_value in service_type_regex.groups(): + if ArcgisServiceType.has_value(service_type_value): + service_type = service_type_value + possible_service_name = "".join(url_fragments[-1:]).strip("/").partition(service_type)[0].rstrip("/") + other = None + break + else: + possible_service_name, other = url_fragments[-1].strip("/").partition("/")[::2] + if possible_service_name is not None and possible_service_name != "": + service_name = possible_service_name + if not service_type and other: + service_type = other.partition("/")[0] + else: + service_name = None + + return catalog_url, service_name, service_type + + +class ArcgisServiceResourceExtractor(abc.ABC): + """Abstract base class with the methods that must be reimplemented + in order to add support for additional ArcGIS REST services""" + + resource_name_filter: typing.Optional[str] + service: typing.Type + + def __init__(self, service, resource_name_filter: typing.Optional[str] = None): + self.service = service + self.resource_name_filter = resource_name_filter + + @abc.abstractmethod + def get_num_resources(self) -> int: + """Return the number of resources that can be extracted from the service.""" + + @abc.abstractmethod + def list_resources(self) -> typing.List[base.BriefRemoteResource]: + """Return a list of BriefRemoteResource with the resources exposed by the service""" + + @abc.abstractmethod + def get_resource( + self, + harvestable_resource: models.HarvestableResource + ) -> base.HarvestedResourceInfo: + """Parse the remote resource into a HarvestedResourceInfo""" + + def _is_relevant_layer(self, layer_name: str) -> bool: + result = False + if self.resource_name_filter is not None: + if self.resource_name_filter.lower() in layer_name.lower(): + result = True + else: + result = True + return result + + +class ArcgisMapServiceResourceExtractor(ArcgisServiceResourceExtractor): + service: arcrest.MapService + http_session: requests.Session + _cached_resources: typing.Optional[typing.List[base.BriefRemoteResource]] + + def __init__(self, service: arcrest.MapService): + super().__init__(service) + self.http_session = requests.Session() + self._cached_resources = None + + def get_num_resources(self) -> int: + if self._cached_resources is None: + self._cached_resources = self._extract_resources() + return len(self._cached_resources) + + def list_resources( + self, + ) -> typing.List[base.BriefRemoteResource]: + if self._cached_resources is None: + self._cached_resources = self._extract_resources() + return self._cached_resources + + def get_resource(self, harvestable_resource: models.HarvestableResource): + response = self.http_session.get( + harvestable_resource.unique_identifier, + params={"f": "json"} + ) + result = None + if response.status_code == requests.codes.ok: + try: + response_payload = response.json() + except json.JSONDecodeError: + logger.exception("Could not decode response payload as valid JSON") + else: + resource_descriptor = self._get_resource_descriptor(response_payload, harvestable_resource) + result = base.HarvestedResourceInfo( + resource_descriptor=resource_descriptor, + additional_information=None + ) + else: + logger.error( + f"Could not retrieve remote resource with unique " + f"identifier {harvestable_resource.unique_identifier!r}" + ) + return result + + def _extract_resources(self) -> typing.List[base.BriefRemoteResource]: + result = [] + try: + for arc_layer in self.service.layers: + if self._is_relevant_layer(arc_layer.name) and arc_layer.type != ArcgisRestApiLayerType.GROUP_LAYER.value: + result.append(self._parse_brief_layer(arc_layer)) + result.extend(self._list_sub_layers(arc_layer)) + except HTTPError: + logger.exception(msg="Could not list resources") + return result + + def _list_sub_layers(self, arc_layer: arcrest.MapLayer) -> typing.List[base.BriefRemoteResource]: + result = [] + for sub_layer in arc_layer.subLayers: + if self._is_relevant_layer(arc_layer.name) and arc_layer.type != ArcgisRestApiLayerType.GROUP_LAYER.value: + result.append(self._parse_brief_layer(sub_layer)) + result.extend(self._list_sub_layers(sub_layer)) + return result + + def _get_resource_descriptor( + self, + layer_representation: typing.Dict, + harvestable_resource: models.HarvestableResource + ) -> resourcedescriptor.RecordDescription: + if harvestable_resource.geonode_resource is None: + resource_uuid = uuid.uuid4() + else: + resource_uuid = uuid.UUID(harvestable_resource.geonode_resource.uuid) + name = layer_representation["name"] + _, service_name, service_type = parse_remote_url(harvestable_resource.unique_identifier) + alternate = slugify(" ".join((service_name, name, str(layer_representation["id"])))) + epsg_code, spatial_extent = _parse_spatial_extent(layer_representation["extent"]) + store = self.service.url.partition("?")[0].strip("/") + return resourcedescriptor.RecordDescription( + uuid=resource_uuid, + identification=resourcedescriptor.RecordIdentification( + name=name, + title=name, + abstract=layer_representation.get("description", ""), + other_constraints=layer_representation.get("copyrightTest", ""), + spatial_extent=spatial_extent, + other_keywords=[ + "ESRI", + f"ArcGIS REST {self.service.__service_type__}", + ] + ), + distribution=resourcedescriptor.RecordDistribution( + link_url=harvestable_resource.unique_identifier, + thumbnail_url=None, + ), + reference_systems=[epsg_code], + additional_parameters={ + "alternate": alternate, + "store": store, + "typename": slugify(f"{layer_representation['id']}-{''.join(c for c in name if ord(c) < 128)}"), + "workspace": "remoteWorkspace", + "ows_url": harvestable_resource.unique_identifier.rpartition("/")[0], + "ptype": GXP_PTYPES["REST_MAP"], + }, + ) + + def _parse_brief_layer(self, arc_layer: arcrest.MapLayer) -> base.BriefRemoteResource: + base_url = urllib.parse.urlparse(self.service.url) + layer_path = "/".join((base_url.path.rstrip("/"), str(arc_layer.id))) + layer_url = urllib.parse.urlunparse( + (base_url.scheme, base_url.netloc, layer_path, "", "", "")) + return base.BriefRemoteResource( + unique_identifier=layer_url, + title=arc_layer.name, + resource_type=arc_layer.type, + ) + + +class ArcgisImageServiceResourceExtractor(ArcgisServiceResourceExtractor): + service: arcrest.ImageService + http_session: requests.Session + + def __init__(self, service: arcrest.ImageService): + super().__init__(service) + self.http_session = requests.Session() + + def get_num_resources(self) -> int: + return len(self.list_resources()) + + def list_resources(self) -> typing.List[base.BriefRemoteResource]: + name = self._get_resource_name() + if self._is_relevant_layer(name): + unique_id = self.service.url.rpartition("?")[0].rstrip("/") + result = [ + base.BriefRemoteResource( + unique_identifier=unique_id, + title=name, + resource_type="raster", + ) + ] + else: + result = [] + return result + + def get_resource( + self, + harvestable_resource: models.HarvestableResource + ) -> base.HarvestedResourceInfo: + response = self.http_session.get( + harvestable_resource.unique_identifier, + params={"f": "json"} + ) + result = None + if response.status_code == requests.codes.ok: + try: + response_payload = response.json() + except json.JSONDecodeError: + logger.exception("Could not decode response payload as valid JSON") + else: + resource_descriptor = self._get_resource_descriptor( + response_payload, harvestable_resource) + result = base.HarvestedResourceInfo( + resource_descriptor=resource_descriptor, + additional_information=None + ) + else: + logger.error( + f"Could not retrieve remote resource with unique " + f"identifier {harvestable_resource.unique_identifier!r}" + ) + return result + + def _get_resource_name(self): + return self.service.url.rpartition("/rest/services/")[-1].partition("/ImageServer")[0] + + def _get_resource_descriptor( + self, + layer_representation: typing.Dict, + harvestable_resource: models.HarvestableResource + ) -> resourcedescriptor.RecordDescription: + if harvestable_resource.geonode_resource is None: + resource_uuid = uuid.uuid4() + else: + resource_uuid = uuid.UUID(harvestable_resource.geonode_resource.uuid) + name = layer_representation["name"] + _, service_name, service_type = parse_remote_url(harvestable_resource.unique_identifier) + alternate = "-".join((service_name, name)) + epsg_code, spatial_extent = _parse_spatial_extent(layer_representation["extent"]) + ows_url = harvestable_resource.unique_identifier.rpartition("/")[0] + return resourcedescriptor.RecordDescription( + uuid=resource_uuid, + identification=resourcedescriptor.RecordIdentification( + name=name, + title=name, + abstract=layer_representation.get("description", ""), + other_constraints=layer_representation.get("copyrightTest", ""), + spatial_extent=spatial_extent, + other_keywords=[ + "ESRI", + f"ArcGIS REST {self.service.__service_type__}", + ] + ), + distribution=resourcedescriptor.RecordDistribution( + link_url=harvestable_resource.unique_identifier, + thumbnail_url=None, + ), + reference_systems=[epsg_code], + additional_parameters={ + "alternate": alternate, + "store": ows_url, + "typename": slugify(''.join(c for c in name if ord(c) < 128)), + "workspace": "remoteWorkspace", + "ows_url": ows_url, + "ptype": GXP_PTYPES["REST_IMG"], + }, + ) + + +def get_resource_extractor( + resource_unique_identifier: str +) -> typing.Optional[ArcgisServiceResourceExtractor]: + """A factory for instantiating the correct extractor for the resource""" + service_type_name = parse_remote_url(resource_unique_identifier)[-1] + service_type = ArcgisServiceType(service_type_name) + if service_type == ArcgisServiceType.MAP_SERVICE: + service = arcrest.MapService(resource_unique_identifier) + result = ArcgisMapServiceResourceExtractor(service) + elif service_type == ArcgisServiceType.IMAGE_SERVICE: + service = arcrest.ImageService(resource_unique_identifier) + result = ArcgisImageServiceResourceExtractor(service) + else: + logger.error(f"Unsupported ArcGIS REST service {service_type!r}") + result = None + return result + + +class ArcgisHarvesterWorker(base.BaseHarvesterWorker): + harvest_map_services: bool + harvest_image_services: bool + resource_name_filter: typing.Optional[str] + service_names_filter: typing.Optional[typing.List[str]] + + http_session: requests.Session + _arc_catalog: typing.Optional[arcrest.Catalog] + _relevant_service_extractors: typing.Optional[ + typing.List[ + typing.Union[ArcgisMapServiceResourceExtractor, ArcgisImageServiceResourceExtractor] + ] + ] + _supported_service_types = { + ArcgisServiceType.MAP_SERVICE: ArcgisMapServiceResourceExtractor, + ArcgisServiceType.IMAGE_SERVICE: ArcgisImageServiceResourceExtractor, + } + + def __init__( + self, + remote_url: str, + harvester_id: int, + harvest_map_services: bool = True, + harvest_image_services: bool = True, + resource_name_filter: typing.Optional[str] = True, + service_names_filter: typing.Optional[typing.List[str]] = None + ) -> None: + catalog_url, service_name, service_type_name = parse_remote_url(remote_url) + if service_name is not None: + names_filter = [service_name] + (service_names_filter or []) + service_type = ArcgisServiceType(service_type_name) + harvest_maps = (service_type == ArcgisServiceType.MAP_SERVICE) or harvest_map_services + harvest_images = (service_type == ArcgisServiceType.IMAGE_SERVICE) or harvest_image_services + else: + names_filter = service_names_filter or [] + harvest_maps = harvest_map_services + harvest_images = harvest_image_services + super().__init__(catalog_url, harvester_id) + self.http_session = requests.Session() + self.harvest_map_services = harvest_maps + self.harvest_image_services = harvest_images + self.resource_name_filter = resource_name_filter + self.service_names_filter = names_filter + self._arc_catalog = None + self._relevant_service_extractors = None + + @property + def allows_copying_resources(self) -> bool: + return False + + @property + def arc_catalog(self): + if self._arc_catalog is None: + try: + self._arc_catalog = arcrest.Catalog(self.remote_url) + except (json.JSONDecodeError, URLError, HTTPError): + logger.exception(f"Could not connect to ArcGIS REST server at {self.remote_url!r}") + return self._arc_catalog + + @classmethod + def from_django_record(cls, harvester: "Harvester"): # noqa + return cls( + remote_url=harvester.remote_url, + harvester_id=harvester.pk, + harvest_map_services=harvester.harvester_type_specific_configuration.get( + "harvest_map_services", True), + harvest_image_services=harvester.harvester_type_specific_configuration.get( + "harvest_image_services", True), + resource_name_filter=harvester.harvester_type_specific_configuration.get( + "resource_name_filter"), + service_names_filter=harvester.harvester_type_specific_configuration.get( + "service_names_filter" + ) + ) + + @classmethod + def get_extra_config_schema(cls) -> typing.Optional[typing.Dict]: + return { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": ( + "https://geonode.org/harvesting/geonode-arcgis-rest-harvester.schema.json"), + "title": "ArcGIS REST harvester config", + "description": ( + "A jsonschema for validating configuration option for GeoNode's " + "remote ArcGIS REST services harvester" + ), + "type": "object", + "properties": { + "harvest_map_services": { + "type": "boolean", + "default": True + }, + "harvest_image_services": { + "type": "boolean", + "default": True + }, + "resource_name_filter": { + "type": "string", + }, + "service_names_filter": { + "type": "array", + "items": { + "type": "string", + } + }, + }, + "additionalProperties": False, + } + + def get_num_available_resources(self) -> int: + result = 0 + for service_extractor in self._get_relevant_services(): + result += service_extractor.get_num_resources() + return result + + def list_resources( + self, + offset: typing.Optional[int] = 0 + ) -> typing.List[base.BriefRemoteResource]: + result = [] + # NOTE: Since ArcGIS REST services work in a nested fashion we are + # not able to paginate the underlying results. As such, we resort to + # processing all resources sequentially. This means we only care about + # `offset=0` and explicitly return an empty list when the supplied + # offset is different. + if offset == 0: + for service_extractor in self._get_relevant_services(): + result.extend(service_extractor.list_resources()) + return result + + def check_availability(self, timeout_seconds: typing.Optional[int] = 5) -> bool: + return self.arc_catalog is not None + + def get_geonode_resource_type(self, remote_resource_type: str) -> typing.Type: + return Dataset + + def get_geonode_resource_defaults( + self, + harvested_info: base.HarvestedResourceInfo, + harvestable_resource: models.HarvestableResource, + ) -> typing.Dict: + defaults = super().get_geonode_resource_defaults(harvested_info, harvestable_resource) + defaults["name"] = harvested_info.resource_descriptor.identification.name + defaults.update(harvested_info.resource_descriptor.additional_parameters) + return defaults + + def get_resource( + self, + harvestable_resource: models.HarvestableResource, + ) -> typing.Optional[base.HarvestedResourceInfo]: + extractor = get_resource_extractor(harvestable_resource.unique_identifier) + extractor.resource_name_filter = self.resource_name_filter + return extractor.get_resource(harvestable_resource) + + def _get_extractor_class(self, service_type: ArcgisServiceType) -> typing.Optional[typing.Type]: + if service_type == ArcgisServiceType.MAP_SERVICE and self.harvest_map_services: + result = ArcgisMapServiceResourceExtractor + elif service_type == ArcgisServiceType.IMAGE_SERVICE and self.harvest_image_services: + result = ArcgisImageServiceResourceExtractor + else: + result = None + return result + + def _get_service_extractors(self, service) -> typing.List: + # This method is fugly. Unfortunately, when multiple services share the + # same name, arcrest just instantiates an `AmbiguousService` instance and + # shoves the concrete services as attributes of this instance. + # To make matters more unpleasant, the arcrest `AmbiguousService` class is + # defined inside the `__getitem__` method of another class, so it cannot be + # imported outside of it. Thus we resort to checking if there is a + # `__service_type__` attribute on the service in order to deduct whether this is a + # legit service or an ambiguous one and then deal with it + result = [] + if not hasattr(service, "__service_type__"): + # this is an arcrest AmbiguousService instance + for sub_service_type in service.__dict__.keys(): + try: + type_ = ArcgisServiceType(sub_service_type) + except ValueError: + logger.debug(f"Unrecognized service type: {sub_service_type!r}") + continue + else: + extractor_class = self._get_extractor_class(type_) + if extractor_class is not None: + sub_service = getattr(service, sub_service_type) + extractor = extractor_class(sub_service) + extractor.resource_name_filter = self.resource_name_filter + result.append(extractor) + else: + try: + type_ = ArcgisServiceType(service.__service_type__) + except ValueError: + logger.debug(f"Unrecognized service type: {service.__service_type__!r}") + else: + extractor_class = self._get_extractor_class(type_) + if extractor_class is not None: + extractor = extractor_class(service) + extractor.resource_name_filter = self.resource_name_filter + result.append(extractor) + return result + + def _get_relevant_services(self) -> typing.List[ + typing.Union[ + ArcgisMapServiceResourceExtractor, + ArcgisImageServiceResourceExtractor + ] + ]: + if self._relevant_service_extractors is None: + result = [] + relevant_service_names = self.service_names_filter or self.arc_catalog.servicenames + for service_name in relevant_service_names: + service = None + for _folder in service_name.split('/'): + if not service: + service = self.arc_catalog[_folder] + else: + service = service[_folder] + extractors = self._get_service_extractors(service) + result.extend(extractors) + self._relevant_service_extractors = result + return self._relevant_service_extractors + + +def _parse_spatial_extent(raw_extent: typing.Dict) -> typing.Tuple[str, geos.Polygon]: + spatial_reference = raw_extent.get("spatialReference", {}) + epsg_code = f"EPSG:{spatial_reference.get('latestWkid', spatial_reference.get('wkid'))}" + extent = geos.Polygon.from_bbox( + ( + raw_extent["xmin"], + raw_extent["ymin"], + raw_extent["xmax"], + raw_extent["ymax"] + ) + ) + return epsg_code, extent diff --git a/geonode/harvesting/harvesters/base.py b/geonode/harvesting/harvesters/base.py index 175f9b65af1..93dbb3eea21 100644 --- a/geonode/harvesting/harvesters/base.py +++ b/geonode/harvesting/harvesters/base.py @@ -51,8 +51,8 @@ class HarvestingException(Exception): class BriefRemoteResource: unique_identifier: str title: str - abstract: str resource_type: str + abstract: typing.Optional[str] = "" should_be_harvested: bool = False @@ -208,7 +208,7 @@ def copy_resource( """ - url = harvested_resource_info.resource_descriptor.distribution.original_format_url + url = harvested_resource_info.resource_descriptor.distribution.download_url result = None if url is not None: target_name = _get_file_name(harvested_resource_info) @@ -252,11 +252,6 @@ def get_geonode_resource_defaults( "files": [str(path) for path in harvested_info.copied_resources], "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url } - - if harvestable_resource.remote_resource_type in ('layers', 'datasets'): - defaults["name"] = harvested_info.resource_descriptor.identification.name - defaults["ows_url"] = harvested_info.resource_descriptor.distribution.wms_url - if self.should_copy_resource(harvestable_resource): defaults["sourcetype"] = enumerations.SOURCE_TYPE_COPYREMOTE else: diff --git a/geonode/harvesting/harvesters/geonodeharvester.py b/geonode/harvesting/harvesters/geonodeharvester.py index a29934ced7d..d80319724ec 100644 --- a/geonode/harvesting/harvesters/geonodeharvester.py +++ b/geonode/harvesting/harvesters/geonodeharvester.py @@ -17,17 +17,17 @@ # ######################################################################### -"""Harvester for legacy GeoNode remote servers""" +"""Harvesters GeoNode remote servers.""" import datetime as dt import enum import json import logging +import math import typing import urllib.parse import uuid -import datetime import dateutil.parser import requests from django.contrib.gis import geos @@ -66,7 +66,396 @@ class GeoNodeResourceType(enum.Enum): MAP = "maps" +class GeoNodeResourceTypeCurrent(enum.Enum): + DOCUMENT = "document" + DATASET = "dataset" + + +class GeonodeCurrentHarvester(base.BaseHarvesterWorker): + """A harvester for modern (v3.2+) GeoNode versions. + + GeoNode versions above 3.2 introduced the concept of `datasets` to replace the older + `layers` concept. The API also has some significative differences. + + """ + + harvest_documents: bool + harvest_datasets: bool + + # harvesting of maps is explicitly disabled - the GeoNode API does not + # really allow reconstructing a Map via API, as there is no information + # about the actual contents of the map, i.e. which layers are contained + # in it + harvest_maps: bool = False + + copy_documents: bool + copy_datasets: bool + resource_title_filter: typing.Optional[str] + start_date_filter: typing.Optional[str] + end_date_filter: typing.Optional[str] + keywords_filter: typing.Optional[typing.List[str]] + categories_filter: typing.Optional[typing.List[str]] + http_session: requests.Session + page_size: int = 10 + + def __init__( + self, + *args, + harvest_documents: typing.Optional[bool] = True, + harvest_datasets: typing.Optional[bool] = True, + copy_datasets: typing.Optional[bool] = False, + copy_documents: typing.Optional[bool] = False, + resource_title_filter: typing.Optional[str] = None, + start_date_filter: typing.Optional[str] = None, + end_date_filter: typing.Optional[str] = None, + keywords_filter: typing.Optional[typing.List[str]] = None, + categories_filter: typing.Optional[typing.List[str]] = None, + **kwargs + ): + """A harvester for remote GeoNode instances.""" + super().__init__(*args, **kwargs) + self.remote_url = self.remote_url.rstrip("/") + self.http_session = requests.Session() + self.harvest_documents = bool(harvest_documents) + self.harvest_datasets = bool(harvest_datasets) + self.copy_datasets = bool(copy_datasets) + self.copy_documents = bool(copy_documents) + self.resource_title_filter = resource_title_filter + self.start_date_filter = start_date_filter + self.end_date_filter = end_date_filter + self.keywords_filter = keywords_filter + self.categories_filter = categories_filter + + @property + def base_api_url(self): + return f"{self.remote_url}/api/v2" + + @property + def allows_copying_resources(self) -> bool: + return True + + @classmethod + def from_django_record(cls, record: models.Harvester): + return _from_django_record(cls, record) + + @classmethod + def get_extra_config_schema(cls) -> typing.Dict: + return _get_extra_config_schema() + + def get_num_available_resources(self) -> int: + url = f"{self.base_api_url}/resources/" + response = self.http_session.get(url, params=self._get_resource_list_params()) + result = 0 + if response.status_code == requests.codes.ok: + try: + result = response.json().get("total", 0) + except json.JSONDecodeError as exc: + logger.exception("Could not decode response as a JSON object") + raise base.HarvestingException(str(exc)) + else: + logger.error(f"Got back invalid response from {url!r}: {response.status_code}") + return result + + def list_resources( + self, + offset: typing.Optional[int] = 0 + ) -> typing.List[base.BriefRemoteResource]: + url = f"{self.base_api_url}/resources/" + response = self.http_session.get(url, params=self._get_resource_list_params(offset)) + result = [] + if response.status_code == requests.codes.ok: + try: + payload = response.json() + except json.JSONDecodeError as exc: + logger.exception("Could not decode response as a JSON object") + raise base.HarvestingException(str(exc)) + else: + for raw_resource in payload.get("resources", []): + try: + brief_resource = base.BriefRemoteResource( + unique_identifier=raw_resource["pk"], + title=raw_resource["title"], + abstract=raw_resource["abstract"], + resource_type=raw_resource["resource_type"], + ) + result.append(brief_resource) + except KeyError as exc: + logger.exception(f"Could not decode resource: {raw_resource!r}") + raise base.HarvestingException(str(exc)) + else: + logger.error(f"Got back invalid response from {url!r}: {response.status_code}") + return result + + def check_availability(self, timeout_seconds: typing.Optional[int] = 5) -> bool: + return _check_availability( + self.http_session, f"{self.base_api_url}/datasets", "datasets", timeout_seconds) + + def get_geonode_resource_type(self, remote_resource_type: str) -> typing.Type[typing.Union[Dataset, Document]]: + return { + GeoNodeResourceTypeCurrent.DATASET.value: Dataset, + GeoNodeResourceTypeCurrent.DOCUMENT.value: Document + }[remote_resource_type] + + def get_resource( + self, + harvestable_resource: models.HarvestableResource, + ) -> typing.Optional[base.HarvestedResourceInfo]: + url_fragment = { + GeoNodeResourceTypeCurrent.DATASET.value: "/datasets/", + GeoNodeResourceTypeCurrent.DOCUMENT.value: "/documents/" + }[harvestable_resource.remote_resource_type] + url = f"{self.base_api_url}{url_fragment}{harvestable_resource.unique_identifier}/" + response = self.http_session.get(url) + result = None + if response.status_code == requests.codes.ok: + try: + response_payload = response.json() + except json.JSONDecodeError: + logger.exception("Could not decode response payload as valid JSON") + else: + resource_descriptor = self._get_resource_descriptor( + response_payload, harvestable_resource.remote_resource_type) + result = base.HarvestedResourceInfo( + resource_descriptor=resource_descriptor, + additional_information=None + ) + else: + logger.error( + f"Could not retrieve remote resource with unique " + f"identifier {harvestable_resource.unique_identifier!r}" + ) + return result + + def should_copy_resource( + self, + harvestable_resource: models.HarvestableResource, + ) -> bool: + return { + GeoNodeResourceTypeCurrent.DATASET.value: self.copy_datasets, + GeoNodeResourceTypeCurrent.DOCUMENT.value: self.copy_documents, + }.get(harvestable_resource.remote_resource_type, False) + + def get_geonode_resource_defaults( + self, + harvested_info: base.HarvestedResourceInfo, + harvestable_resource: models.HarvestableResource, + ) -> typing.Dict: + defaults = super().get_geonode_resource_defaults( + harvested_info, harvestable_resource) + defaults.update(harvested_info.resource_descriptor.additional_parameters) + local_resource_type = self.get_geonode_resource_type(harvestable_resource.remote_resource_type) + to_copy = self.should_copy_resource(harvestable_resource) + if local_resource_type == Document and not to_copy: + # since we are not copying the document, we need to provide suitable remote URLs + defaults.update({ + "doc_url": harvested_info.resource_descriptor.distribution.embed_url, + "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url, + }) + elif local_resource_type == Dataset: + defaults.update({ + "name": harvested_info.resource_descriptor.identification.name + }) + if not to_copy: + # since we are not copying the dataset, we need to provide suitable SRID and remote URL + try: + srid = harvested_info.resource_descriptor.reference_systems[0] + except AttributeError: + srid = None + defaults.update({ + "alternate": defaults["alternate"], + "workspace": defaults["workspace"], + "ows_url": harvested_info.resource_descriptor.distribution.wms_url, + "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url, + "srid": srid, + }) + return defaults + + def _get_contact_descriptor(self, role, contact_details: typing.Dict): + return resourcedescriptor.RecordDescriptionContact( + role=role, + name=self._get_related_name(contact_details) or contact_details["username"] + ) + + def _get_related_name(self, contact_details: typing.Dict): + return " ".join(( + contact_details.get("first_name", ""), + contact_details.get("last_name", "") + )).strip() + + def _get_document_link_info(self, resource: typing.Dict): + native_format = resource["extension"] + download_url = resource["href"] + return native_format, download_url + + def _get_dataset_link_info(self, resource: typing.Dict, spatial_extent: geos.Polygon): + wms_url = None + wfs_url = None + wcs_url = None + download_url = None + native_format = None + for link_info in resource.get("links", []): + type_ = link_info["link_type"] + if type_ == "OGC:WMS": + wms_url = link_info["url"] + elif type_ == "OGC:WFS": + wfs_url = link_info["url"] + native_format = "shapefile" + query_params = { + "service": "WFS", + "version": "1.0.0", + "request": "GetFeature", + "typename": resource["name"], + "outputformat": "SHAPE-ZIP", + "srs": resource["srid"], + "format_options": "charset:UTF-8", + } + download_url = f"{wfs_url}?{urllib.parse.urlencode(query_params)}" + elif type_ == "OGC:WCS": + wcs_url = link_info["url"] + native_format = "geotiff" + coords = spatial_extent.coords[0] + min_x = min([i[0] for i in coords]) + max_x = max([i[0] for i in coords]) + min_y = min([i[1] for i in coords]) + max_y = max([i[1] for i in coords]) + coverage_id = resource["alternate"].replace(":", "__") + query_params = { + "service": "WCS", + "version": "2.0.1", + "request": "GetCoverage", + "srs": resource["srid"], + "format": "image/tiff", + "coverageid": coverage_id, + "bbox": f"{min_x},{min_y},{max_x},{max_y}" + } + download_url = f"{wcs_url}?{urllib.parse.urlencode(query_params)}" + return native_format, download_url, wms_url, wfs_url, wcs_url + + def _get_resource_link_info( + self, + resource: typing.Dict, remote_resource_type: str, + spatial_extent: geos.Polygon, + ) -> typing.Tuple[ + str, + str, + str, + str, + typing.Optional[str], + typing.Optional[str], + typing.Optional[str], + ]: + embed_url = resource["embed_url"] + thumbnail_url = resource["thumbnail_url"] + if remote_resource_type == GeoNodeResourceTypeCurrent.DATASET.value: + native_format, download_url, wms_url, wfs_url, wcs_url = self._get_dataset_link_info( + resource, spatial_extent) + else: + wms_url = None + wfs_url = None + wcs_url = None + native_format, download_url = self._get_document_link_info(resource) + return native_format, download_url, embed_url, thumbnail_url, wms_url, wfs_url, wcs_url + + def _get_resource_descriptor( + self, + raw_resource: typing.Dict, + remote_resource_type: str, + ) -> resourcedescriptor.RecordDescription: + resource = raw_resource[remote_resource_type] + resource_date = dateutil.parser.isoparse(resource["date"]) + resource_datestamp = dateutil.parser.isoparse(resource["last_updated"]) + try: # these are sometimes returned as None + temporal_extent_start = dateutil.parser.isoparse(resource["temporal_extent_start"]) + temporal_extent_end = dateutil.parser.isoparse(resource["temporal_extent_end"]) + except TypeError: + temporal_extent_start = None + temporal_extent_end = None + spatial_extent = geos.GEOSGeometry(json.dumps(resource.get("bbox_polygon"))) + link_info = self._get_resource_link_info(resource, remote_resource_type, spatial_extent) + native_format, download_url, embed_url, thumbnail_url, wms_url, wfs_url, wcs_url = link_info + descriptor = resourcedescriptor.RecordDescription( + # these work for both datasets and documents + uuid=resource["uuid"], + language=resource["language"], + point_of_contact=self._get_contact_descriptor("pointOfContact", resource["poc"]), + author=self._get_contact_descriptor("author", resource["metadata_author"]), + date_stamp=resource_datestamp, + reference_systems=[resource["srid"]], + data_quality=resource.get("raw_data_quality_statement"), + character_set=resource.get("charset", "UTF-8"), + identification=resourcedescriptor.RecordIdentification( + name=resource["name"], + title=resource["title"], + date=resource_date, + date_type=resource["date_type"], + originator=self._get_contact_descriptor("originator", resource["owner"]), + place_keywords=[i.get("code") for i in resource.get("regions", [])], + other_keywords=[i.get("slug") for i in resource.get("keywords", [])], + license=(resource.get("license") or {}).get("identifier"), + abstract=resource.get("raw_abstract", ""), + purpose=resource.get("raw_purpose", ""), + native_format=native_format, + other_constraints=resource.get("raw_constraints_other"), + topic_category=(resource.get("category") or {}).get("identifier"), + supplemental_information=resource.get("raw_supplemental_information"), + spatial_extent=spatial_extent, + temporal_extent=(temporal_extent_start, temporal_extent_end) if temporal_extent_start else None + ), + distribution=resourcedescriptor.RecordDistribution( + link_url=resource["link"], + wms_url=wms_url, + wfs_url=wfs_url, + wcs_url=wcs_url, + thumbnail_url=thumbnail_url, + download_url=download_url, + embed_url=embed_url, + ), + additional_parameters={ + "subtype": resource["subtype"], + "resource_type": remote_resource_type, + } + ) + if remote_resource_type == GeoNodeResourceTypeCurrent.DOCUMENT.value: + descriptor.additional_parameters["extension"] = resource["extension"] + elif remote_resource_type == GeoNodeResourceTypeCurrent.DATASET.value: + descriptor.additional_parameters.update({ + "alternate": resource["alternate"], + "workspace": resource["workspace"], + }) + return descriptor + + def _get_resource_list_params( + self, offset: typing.Optional[int] = 0) -> typing.Dict: + current_page = math.floor((offset + self.page_size) / self.page_size) + result = { + "page_size": self.page_size, + "page": current_page, + } + resource_filter = [] + if self.harvest_datasets: + resource_filter.append(GeoNodeResourceTypeCurrent.DATASET.value) + if self.harvest_documents: + resource_filter.append(GeoNodeResourceTypeCurrent.DOCUMENT.value) + if len(resource_filter) > 0: + result["filter{resource_type.in}"] = resource_filter + + if self.resource_title_filter is not None: + result["filter{title.icontains}"] = self.resource_title_filter + if self.start_date_filter is not None: + start_date = dateutil.parser.parse(self.start_date_filter) + result["filter{date.gte}"] = f"{start_date.astimezone(dt.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" + if self.end_date_filter is not None: + end_date = dateutil.parser.parse(self.end_date_filter) + result["filter{date.lte}"] = f"{end_date.astimezone(dt.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" + if self.keywords_filter is not None: + result["filter{keywords.slug.in}"] = self.keywords_filter + if self.categories_filter is not None: + result["filter{category.identifier.in}"] = self.categories_filter + return result + + class GeonodeLegacyHarvester(base.BaseHarvesterWorker): + """A harvester for older (v <= 3.2) GeoNode versions""" harvest_documents: bool harvest_datasets: bool @@ -77,6 +466,7 @@ class GeonodeLegacyHarvester(base.BaseHarvesterWorker): harvest_maps: bool = False copy_documents: bool + copy_datasets: bool resource_title_filter: typing.Optional[str] http_session: requests.Session page_size: int = 10 @@ -120,84 +510,11 @@ def allows_copying_resources(self) -> bool: @classmethod def from_django_record(cls, record: models.Harvester): - return cls( - record.remote_url, - record.id, - harvest_documents=record.harvester_type_specific_configuration.get( - "harvest_documents", True), - harvest_datasets=record.harvester_type_specific_configuration.get( - "harvest_datasets", True), - copy_datasets=record.harvester_type_specific_configuration.get( - "copy_datasets", False), - copy_documents=record.harvester_type_specific_configuration.get( - "copy_documents", False), - resource_title_filter=record.harvester_type_specific_configuration.get( - "resource_title_filter"), - start_date_filter=record.harvester_type_specific_configuration.get( - "start_date_filter"), - end_date_filter=record.harvester_type_specific_configuration.get( - "end_date_filter"), - keywords_filter=record.harvester_type_specific_configuration.get( - "keywords_filter"), - categories_filter=record.harvester_type_specific_configuration.get( - "categories_filter") - ) + return _from_django_record(cls, record) @classmethod def get_extra_config_schema(cls) -> typing.Dict: - return { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": ( - "https://geonode.org/harvesting/geonode-legacy-harvester.schema.json"), - "title": "GeoNode harvester config", - "description": ( - "A jsonschema for validating configuration option for GeoNode's " - "remote GeoNode harvester" - ), - "type": "object", - "properties": { - "harvest_documents": { - "type": "boolean", - "default": True - }, - "copy_documents": { - "type": "boolean", - "default": False - }, - "harvest_datasets": { - "type": "boolean", - "default": True - }, - "copy_datasets": { - "type": "boolean", - "default": False - }, - "resource_title_filter": { - "type": "string", - }, - "start_date_filter": { - "type": "string", - "format": "date-time" - }, - "end_date_filter": { - "type": "string", - "format": "date-time" - }, - "keywords_filter": { - "type": "array", - "items": { - "type": "string" - } - }, - "categories_filter": { - "type": "array", - "items": { - "type": "string" - } - }, - }, - "additionalProperties": False, - } + return _get_extra_config_schema() def get_num_available_resources(self) -> int: result = 0 @@ -251,22 +568,7 @@ def list_resources( def check_availability(self, timeout_seconds: typing.Optional[int] = 5) -> bool: """Check whether the remote GeoNode is online.""" - try: - response = self.http_session.get( - f"{self.base_api_url}/", timeout=timeout_seconds) - response.raise_for_status() - except (requests.HTTPError, requests.ConnectionError): - result = False - else: - try: - response_payload = response.json() - except json.JSONDecodeError: - logger.exception("Could not decode server response as valid JSON") - result = False - else: - layers_endpoint_present = response_payload.get("layers") is not None - result = layers_endpoint_present - return result + return _check_availability(self.http_session, f"{self.base_api_url}/", "layers", timeout_seconds) def get_geonode_resource_type(self, remote_resource_type: str) -> typing.Type[typing.Union[Dataset, Document, Map]]: """Return resource type class from resource type string.""" @@ -300,13 +602,15 @@ def get_resource( additional_information=None ) else: - logger.warning( - f"Could not retrieve remote resource {resource_unique_identifier!r}") + logger.error( + f"Could not retrieve remote resource with unique " + f"identifier {resource_unique_identifier!r}" + ) return result def should_copy_resource( self, - harvestable_resource: "HarvestableResource", # noqa + harvestable_resource: models.HarvestableResource, ) -> bool: return { GeoNodeResourceType.DOCUMENT.value: self.copy_documents, @@ -326,23 +630,25 @@ def get_geonode_resource_defaults( if local_resource_type == Document and not to_copy: # since we are not copying the document, we need to provide suitable remote URLs defaults.update({ - "doc_url": harvested_info.resource_descriptor.distribution.original_format_url, + "doc_url": harvested_info.resource_descriptor.distribution.download_url, "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url, }) - elif local_resource_type == Dataset and not to_copy: - # since we are not copying the dataset, we need to provide suitable SRID and remote URL - try: - srid = harvested_info.resource_descriptor.reference_systems[0] - except AttributeError: - srid = None + elif local_resource_type == Dataset: defaults.update({ - "alternate": defaults["name"], - "name": defaults["name"].rpartition(":")[-1], - "workspace": defaults["name"].rpartition(":")[0], - "ows_url": harvested_info.resource_descriptor.distribution.wms_url, - "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url, - "srid": srid, + "name": harvested_info.resource_descriptor.identification.name, }) + if not to_copy: + # since we are not copying the dataset, we need to provide suitable SRID and remote URL + try: + srid = harvested_info.resource_descriptor.reference_systems[0] + except AttributeError: + srid = None + defaults.update({ + "name": defaults["name"].rpartition(":")[-1], + "ows_url": harvested_info.resource_descriptor.distribution.wms_url, + "thumbnail_url": harvested_info.resource_descriptor.distribution.thumbnail_url, + "srid": srid, + }) return defaults def _get_num_available_resources_by_type( @@ -395,11 +701,11 @@ def _list_resources_by_type( ) response.raise_for_status() result = [] - for resource in response.json().get('objects', []): + for resource in response.json().get("objects", []): brief_resource = base.BriefRemoteResource( unique_identifier=self._extract_unique_identifier(resource), - title=resource['title'], - abstract=resource['abstract'], + title=resource["title"], + abstract=resource["abstract"], resource_type=resource_type.value, ) result.append(brief_resource) @@ -464,16 +770,14 @@ def _get_resource_list_params( result["title__icontains"] = self.resource_title_filter if self.start_date_filter is not None: start_date = dateutil.parser.parse(self.start_date_filter) - result["date__gte"] = f"{start_date.astimezone(datetime.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" + result["date__gte"] = f"{start_date.astimezone(dt.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" if self.end_date_filter is not None: end_date = dateutil.parser.parse(self.end_date_filter) - result["date__lte"] = f"{end_date.astimezone(datetime.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" + result["date__lte"] = f"{end_date.astimezone(dt.timezone.utc).replace(microsecond=0).isoformat().split('+')[0]}Z" if self.keywords_filter is not None: result["keywords__slug__in"] = ','.join(self.keywords_filter) if self.categories_filter is not None: result["category__identifier__in"] = ','.join(self.categories_filter) - if self.categories_filter is not None: - result["category__identifier__in"] = ','.join(self.categories_filter) return result def _get_total_records( @@ -513,9 +817,6 @@ def _get_resource_descriptor( character_set=csw_record.xpath( "gmd:characterSet/gmd:MD_CharacterSetCode/text()", namespaces=csw_record.nsmap)[0], - hierarchy_level=csw_record.xpath( - "gmd:hierarchyLevel/gmd:MD_ScopeCode/text()", - namespaces=csw_record.nsmap)[0], point_of_contact=get_contact_descriptor( csw_record.xpath( "gmd:contact[.//gmd:role//@codeListValue='pointOfContact']", @@ -561,7 +862,9 @@ def _get_dataset_additional_parameters( result = { "name": descriptor.identification.name, "charset": descriptor.character_set, - "resource_type": "dataset" + "resource_type": "dataset", + "alternate": api_record.get("alternate", descriptor.identification.name), + "workspace": api_record.get("workspace") } if descriptor.identification.native_format.lower() == RemoteDatasetType.VECTOR.value: result["subtype"] = GeoNodeDatasetType.VECTOR.value @@ -606,8 +909,6 @@ def get_distribution_info( wms = None wfs = None wcs = None - legend = None - geojson = None original = None original_format_values = ( "original dataset format", @@ -625,10 +926,6 @@ def get_distribution_info( wfs = linkage elif "ogc:wcs" in protocol: wcs = linkage - elif "legend" in description.lower(): - legend = linkage - elif "geojson" in description.lower(): - geojson = linkage else: for original_value in original_format_values: if original_value in description.lower(): @@ -660,7 +957,7 @@ def get_distribution_info( "request": "GetCoverage", "srs": crs, "format": "image/tiff", - "coverageid": identification_descriptor.name.replace(":", "__"), + "coverageid": api_record["alternate"].replace(":", "__"), "bbox": f"{min_x},{min_y},{max_x},{max_y}" } original = f"{wcs}?{urllib.parse.urlencode(query_params)}" @@ -676,9 +973,8 @@ def get_distribution_info( wcs_url=wcs, thumbnail_url=self._retrieve_thumbnail_url( api_record, harvestable_resource), - legend_url=legend, - geojson_url=geojson, - original_format_url=original, + download_url=original, + embed_url=original, ) def _retrieve_thumbnail_url( @@ -728,6 +1024,118 @@ def _retrieve_thumbnail_url( return thumbnail +class GeonodeUnifiedHarvesterWorker(base.BaseHarvesterWorker): + """A harvester worker that is able to retrieve details from most GeoNode deployments. + + This harvester type relies on the `GeonodeCurrentHarvester` and `GeonodeLegacyHarvester` for most + operations. It simply determines which concrete harvester to use based on the remote's response + for the availability check and then uses it. + + """ + + _concrete_harvester_worker: typing.Optional[ + typing.Union[GeonodeCurrentHarvester, GeonodeLegacyHarvester]] + + def __init__( + self, + *args, + harvest_documents: typing.Optional[bool] = True, + harvest_datasets: typing.Optional[bool] = True, + copy_datasets: typing.Optional[bool] = False, + copy_documents: typing.Optional[bool] = False, + resource_title_filter: typing.Optional[str] = None, + start_date_filter: typing.Optional[str] = None, + end_date_filter: typing.Optional[str] = None, + keywords_filter: typing.Optional[typing.List[str]] = None, + categories_filter: typing.Optional[typing.List[str]] = None, + **kwargs + ): + """A harvester for remote GeoNode instances.""" + self._concrete_harvester_worker = None + super().__init__(*args, **kwargs) + self.remote_url = self.remote_url.rstrip("/") + self.http_session = requests.Session() + self.harvest_documents = bool(harvest_documents) + self.harvest_datasets = bool(harvest_datasets) + self.copy_datasets = bool(copy_datasets) + self.copy_documents = bool(copy_documents) + self.resource_title_filter = resource_title_filter + self.start_date_filter = start_date_filter + self.end_date_filter = end_date_filter + self.keywords_filter = keywords_filter + self.categories_filter = categories_filter + + @property + def concrete_worker(self) -> typing.Union[GeonodeCurrentHarvester, GeonodeLegacyHarvester]: + if self._concrete_harvester_worker is None: + self._concrete_harvester_worker = self._get_concrete_worker() + return self._concrete_harvester_worker + + @property + def allows_copying_resources(self) -> bool: + return self.concrete_worker.allows_copying_resources + + @classmethod + def from_django_record(cls, record: models.Harvester): + return _from_django_record(cls, record) + + @classmethod + def get_extra_config_schema(cls) -> typing.Dict: + return _get_extra_config_schema() + + def get_num_available_resources(self) -> int: + return self.concrete_worker.get_num_available_resources() + + def list_resources( + self, + offset: typing.Optional[int] = 0 + ) -> typing.List[base.BriefRemoteResource]: + return self.concrete_worker.list_resources(offset) + + def check_availability(self, timeout_seconds: typing.Optional[int] = 5) -> bool: + return self.concrete_worker.check_availability(timeout_seconds) + + def get_geonode_resource_type(self, remote_resource_type: str) -> typing.Type[typing.Union[Dataset, Document]]: + return self.concrete_worker.get_geonode_resource_type(remote_resource_type) + + def get_resource( + self, + harvestable_resource: models.HarvestableResource, + ) -> typing.Optional[base.HarvestedResourceInfo]: + return self.concrete_worker.get_resource(harvestable_resource) + + def should_copy_resource( + self, + harvestable_resource: models.HarvestableResource, + ) -> bool: + return self.concrete_worker.should_copy_resource(harvestable_resource) + + def get_geonode_resource_defaults( + self, + harvested_info: base.HarvestedResourceInfo, + harvestable_resource: models.HarvestableResource, + ) -> typing.Dict: + return self.concrete_worker.get_geonode_resource_defaults(harvested_info, harvestable_resource) + + def _get_concrete_worker(self) -> typing.Union[GeonodeCurrentHarvester, GeonodeLegacyHarvester]: + # first try to initialize the GeonodeCurrentHarvester. If not available, fall back to the legacy one + kwargs = { + "remote_url": self.remote_url, + "harvester_id": self.harvester_id, + "harvest_documents": self.harvest_documents, + "harvest_datasets": self.harvest_datasets, + "copy_documents": self.copy_documents, + "copy_datasets": self.copy_datasets, + "resource_title_filter": self.resource_title_filter, + "start_date_filter": self.start_date_filter, + "end_date_filter": self.end_date_filter, + "keywords_filter": self.keywords_filter, + "categories_filter": self.categories_filter + } + current = GeonodeCurrentHarvester(**kwargs) + return current if current.check_availability() else GeonodeLegacyHarvester(**kwargs) + + def get_contact_descriptor(contact: etree.Element): return resourcedescriptor.RecordDescriptionContact( role=contact.xpath( @@ -811,8 +1219,6 @@ def get_identification_descriptor(csw_identification: etree.Element, api_record: csw_identification.xpath( ".//gmd:pointOfContact", namespaces=csw_identification.nsmap)[0] ), - graphic_overview_uri=get_xpath_value( - csw_identification, ".//gmd:graphicOverview//gmd:fileName"), native_format=_get_native_format(csw_identification, api_record), place_keywords=place_keywords, other_keywords=other_keywords, @@ -907,3 +1313,107 @@ def get_temporal_extent( def _get_optional_attribute_value( element: etree.Element, xpath: str) -> typing.Optional[str]: return element.xpath(f"{xpath}/text()", namespaces=element.nsmap)[0].strip() or None + + +def _get_extra_config_schema() -> typing.Dict: + return { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": ( + "https://geonode.org/harvesting/geonode-legacy-harvester.schema.json"), + "title": "GeoNode harvester config", + "description": ( + "A jsonschema for validating configuration option for GeoNode's " + "remote GeoNode harvester" + ), + "type": "object", + "properties": { + "harvest_documents": { + "type": "boolean", + "default": True + }, + "copy_documents": { + "type": "boolean", + "default": False + }, + "harvest_datasets": { + "type": "boolean", + "default": True + }, + "copy_datasets": { + "type": "boolean", + "default": False + }, + "resource_title_filter": { + "type": "string", + }, + "start_date_filter": { + "type": "string", + "format": "date-time" + }, + "end_date_filter": { + "type": "string", + "format": "date-time" + }, + "keywords_filter": { + "type": "array", + "items": { + "type": "string" + } + }, + "categories_filter": { + "type": "array", + "items": { + "type": "string" + } + }, + }, + "additionalProperties": False, + } + + +def _from_django_record(target_class: typing.Type, record: models.Harvester): + return target_class( + record.remote_url, + record.id, + harvest_documents=record.harvester_type_specific_configuration.get( + "harvest_documents", True), + harvest_datasets=record.harvester_type_specific_configuration.get( + "harvest_datasets", True), + copy_datasets=record.harvester_type_specific_configuration.get( + "copy_datasets", False), + copy_documents=record.harvester_type_specific_configuration.get( + "copy_documents", False), + resource_title_filter=record.harvester_type_specific_configuration.get( + "resource_title_filter"), + start_date_filter=record.harvester_type_specific_configuration.get( + "start_date_filter"), + end_date_filter=record.harvester_type_specific_configuration.get( + "end_date_filter"), + keywords_filter=record.harvester_type_specific_configuration.get( + "keywords_filter"), + categories_filter=record.harvester_type_specific_configuration.get( + "categories_filter") + ) + + +def _check_availability( + http_session, + url: str, + payload_key_to_check: str, + timeout_seconds: typing.Optional[int] = 5, +) -> bool: + try: + response = http_session.get(url, timeout=timeout_seconds) + response.raise_for_status() + except (requests.HTTPError, requests.ConnectionError): + result = False + else: + try: + response_payload = response.json() + except json.JSONDecodeError: + logger.exception("Could not decode server response as valid JSON") + result = False + else: + key_present = response_payload.get(payload_key_to_check) is not None + result = key_present + return result diff --git a/geonode/harvesting/harvesters/wms.py b/geonode/harvesting/harvesters/wms.py index 164e44d94b5..0ec38f1d364 100644 --- a/geonode/harvesting/harvesters/wms.py +++ b/geonode/harvesting/harvesters/wms.py @@ -20,6 +20,7 @@ import typing import uuid from datetime import datetime +from functools import lru_cache from urllib.parse import ( unquote, urlparse, @@ -36,22 +37,26 @@ from django.conf import settings from django.contrib.gis import geos +from django.template.defaultfilters import slugify from geonode.layers.models import Dataset from geonode.base.models import ResourceBase +from geonode.layers.enumerations import GXP_PTYPES from geonode.thumbs.thumbnails import create_thumbnail -from . import base -from ..models import Harvester, HarvestableResource +from .. import models from ..utils import ( XML_PARSER, get_xpath_value, ) from .. import resourcedescriptor +from . import base + logger = logging.getLogger(__name__) +@lru_cache() def WebMapService(url, version='1.3.0', xml=None, @@ -129,7 +134,7 @@ def allows_copying_resources(self) -> bool: return False @classmethod - def from_django_record(cls, record: Harvester): + def from_django_record(cls, record: models.Harvester): return cls( record.remote_url, record.id, @@ -239,10 +244,12 @@ def list_resources( resources = [] data = self._get_data() for layer in data['layers']: + name = layer['name'] + title = layer.get('title') or name.rpartition(':')[-1] resources.append( base.BriefRemoteResource( - unique_identifier=layer['name'], - title=layer['title'], + unique_identifier=name, + title=title, abstract=layer['abstract'], resource_type='layers', ) @@ -272,9 +279,19 @@ def get_geonode_resource_type(self, remote_resource_type: str) -> ResourceBase: # So whatever remote_resource_type it is, it always return Layer. return Dataset + def get_geonode_resource_defaults( + self, + harvested_info: base.HarvestedResourceInfo, + harvestable_resource: models.HarvestableResource, # noqa + ) -> typing.Dict: + defaults = super().get_geonode_resource_defaults(harvested_info, harvestable_resource) + defaults["name"] = harvested_info.resource_descriptor.identification.name + defaults.update(harvested_info.resource_descriptor.additional_parameters) + return defaults + def get_resource( self, - harvestable_resource: HarvestableResource, + harvestable_resource: models.HarvestableResource, ) -> typing.Optional[base.HarvestedResourceInfo]: resource_unique_identifier = harvestable_resource.unique_identifier data = self._get_data() @@ -292,6 +309,7 @@ def get_resource( # WMS does not provide the date of the resource. # Use current time for the date stamp and resource time. time = datetime.now() + service_name = slugify(self.remote_url)[:255] contact = resourcedescriptor.RecordDescriptionContact(**data['contact']) result = base.HarvestedResourceInfo( resource_descriptor=resourcedescriptor.RecordDescription( @@ -305,7 +323,6 @@ def get_resource( date=time, date_type='', originator=contact, - graphic_overview_uri='', place_keywords=[], other_keywords=relevant_layer['keywords'], license=[], @@ -313,10 +330,16 @@ def get_resource( spatial_extent=relevant_layer['spatial_extent'] ), distribution=resourcedescriptor.RecordDistribution( - legend_url=relevant_layer['legend_url'], - wms_url=relevant_layer['wms_url'] + wms_url=relevant_layer['wms_url'], ), reference_systems=[relevant_layer['crs']], + additional_parameters={ + 'alternate': relevant_layer["name"], + 'store': service_name, + 'workspace': 'remoteWorkspace', + 'ows_url': relevant_layer['wms_url'], + 'ptype': GXP_PTYPES["WMS"] + } ), additional_information=None ) @@ -510,7 +533,7 @@ def finalize_resource_update( self, geonode_resource: ResourceBase, harvested_info: base.HarvestedResourceInfo, - harvestable_resource: HarvestableResource + harvestable_resource: models.HarvestableResource ) -> ResourceBase: """Create a thumbnail with a WMS request.""" if not geonode_resource.srid: diff --git a/geonode/harvesting/migrations/0046_alter_harvester_harvester_type.py b/geonode/harvesting/migrations/0046_alter_harvester_harvester_type.py new file mode 100644 index 00000000000..61abf01fe21 --- /dev/null +++ b/geonode/harvesting/migrations/0046_alter_harvester_harvester_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.7 on 2021-10-08 22:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('harvesting', '0045_remove_harvester_update_frequency'), + ] + + operations = [ + migrations.AlterField( + model_name='harvester', + name='harvester_type', + field=models.CharField(choices=[('geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', 'geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker'), ('geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester', 'geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester'), ('geonode.harvesting.harvesters.geonodeharvester.GeonodeCurrentHarvester', 'geonode.harvesting.harvesters.geonodeharvester.GeonodeCurrentHarvester'), ('geonode.harvesting.harvesters.wms.OgcWmsHarvester', 'geonode.harvesting.harvesters.wms.OgcWmsHarvester')], default='geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', help_text='Harvester class used to perform harvesting sessions. New harvester types can be added by an admin by changing the main GeoNode `settings.py` file', max_length=255), + ), + ] diff --git a/geonode/harvesting/migrations/0047_convert_geonode_harvesters_to_unified.py b/geonode/harvesting/migrations/0047_convert_geonode_harvesters_to_unified.py new file mode 100644 index 00000000000..c15a94dd437 --- /dev/null +++ b/geonode/harvesting/migrations/0047_convert_geonode_harvesters_to_unified.py @@ -0,0 +1,37 @@ +# Generated by Django 3.2.7 on 2021-10-11 11:51 + +from django.db import migrations + +_INDIVIDUAL_HARVESTER_PATHS = [ + "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester", + "geonode.harvesting.harvesters.geonodeharvester.GeonodeCurrentHarvester", +] +_UNIFIED_HARVESTER_PATH = "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker" + + +def convert_geonode_harvesters_to_unified(apps, schema_editor): + """Convert GeoNode harvesters to use the unified harvester""" + harvester_model = apps.get_model('harvesting', 'Harvester') + for harvester in harvester_model.objects.all(): + if harvester.harvester_type in _INDIVIDUAL_HARVESTER_PATHS: + harvester.harvester_type = _UNIFIED_HARVESTER_PATH + harvester.save() + + +def reverse_harvester_conversion(apps, schema_editor): + harvester_model = apps.get_model('harvesting', 'Harvester') + for harvester in harvester_model.objects.all(): + if harvester.harvester_type == _UNIFIED_HARVESTER_PATH: + harvester.harvester_type = _INDIVIDUAL_HARVESTER_PATHS[0] + harvester.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('harvesting', '0046_alter_harvester_harvester_type'), + ] + + operations = [ + migrations.RunPython(convert_geonode_harvesters_to_unified, reverse_code=reverse_harvester_conversion), + ] diff --git a/geonode/harvesting/migrations/0048_alter_harvester_harvester_type.py b/geonode/harvesting/migrations/0048_alter_harvester_harvester_type.py new file mode 100644 index 00000000000..f891cffaded --- /dev/null +++ b/geonode/harvesting/migrations/0048_alter_harvester_harvester_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.7 on 2021-10-11 12:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('harvesting', '0047_convert_geonode_harvesters_to_unified'), + ] + + operations = [ + migrations.AlterField( + model_name='harvester', + name='harvester_type', + field=models.CharField(choices=[('geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', 'geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker'), ('geonode.harvesting.harvesters.wms.OgcWmsHarvester', 'geonode.harvesting.harvesters.wms.OgcWmsHarvester')], default='geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', help_text='Harvester class used to perform harvesting sessions. New harvester types can be added by an admin by changing the main GeoNode `settings.py` file', max_length=255), + ), + ] diff --git a/geonode/harvesting/migrations/0049_alter_harvester_harvester_type.py b/geonode/harvesting/migrations/0049_alter_harvester_harvester_type.py new file mode 100644 index 00000000000..bab927da32a --- /dev/null +++ b/geonode/harvesting/migrations/0049_alter_harvester_harvester_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.4 on 2021-10-12 13:43 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('harvesting', '0048_alter_harvester_harvester_type'), + ] + + operations = [ + migrations.AlterField( + model_name='harvester', + name='harvester_type', + field=models.CharField(choices=[('geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', 'geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker'), ('geonode.harvesting.harvesters.wms.OgcWmsHarvester', 'geonode.harvesting.harvesters.wms.OgcWmsHarvester'), ('geonode.harvesting.harvesters.arcgis.ArcgisHarvesterWorker', 'geonode.harvesting.harvesters.arcgis.ArcgisHarvesterWorker')], default='geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', help_text='Harvester class used to perform harvesting sessions. New harvester types can be added by an admin by changing the main GeoNode `settings.py` file', max_length=255), + ), + ] diff --git a/geonode/harvesting/resourcedescriptor.py b/geonode/harvesting/resourcedescriptor.py index fed58640d9d..5c67d760f1d 100644 --- a/geonode/harvesting/resourcedescriptor.py +++ b/geonode/harvesting/resourcedescriptor.py @@ -45,13 +45,12 @@ class RecordDescriptionContact: class RecordIdentification: name: str title: str - date: dt.datetime - date_type: str - originator: RecordDescriptionContact - graphic_overview_uri: str - place_keywords: typing.List[str] - other_keywords: typing.Tuple - license: typing.List[str] + date: typing.Optional[dt.datetime] = None + date_type: typing.Optional[str] = None + originator: typing.Optional[RecordDescriptionContact] = None + place_keywords: typing.Optional[typing.List[str]] = None + other_keywords: typing.Optional[typing.Iterable] = None + license: typing.Optional[typing.List[str]] = None abstract: typing.Optional[str] = "" purpose: typing.Optional[str] = "" status: typing.Optional[str] = "" @@ -70,9 +69,8 @@ class RecordDistribution: wfs_url: typing.Optional[str] = None wcs_url: typing.Optional[str] = None thumbnail_url: typing.Optional[str] = None - legend_url: typing.Optional[str] = None - geojson_url: typing.Optional[str] = None - original_format_url: typing.Optional[str] = None + download_url: typing.Optional[str] = None + embed_url: typing.Optional[str] = None @dataclasses.dataclass() @@ -87,12 +85,11 @@ class MapDescriptorParameters: @dataclasses.dataclass() class RecordDescription: uuid: uuid.UUID - point_of_contact: RecordDescriptionContact - author: RecordDescriptionContact - date_stamp: dt.datetime identification: RecordIdentification distribution: RecordDistribution - hierarchy_level: typing.Optional[str] = "dataset" + point_of_contact: typing.Optional[RecordDescriptionContact] = None + author: typing.Optional[RecordDescriptionContact] = None + date_stamp: typing.Optional[dt.datetime] = None reference_systems: typing.Optional[typing.List[str]] = None data_quality: typing.Optional[str] = None additional_parameters: typing.Optional[typing.Dict] = dataclasses.field( diff --git a/geonode/harvesting/tasks.py b/geonode/harvesting/tasks.py index e53994c7f23..17da424673b 100644 --- a/geonode/harvesting/tasks.py +++ b/geonode/harvesting/tasks.py @@ -225,7 +225,6 @@ def _harvest_resource( result = True details = "" except (RuntimeError, ValidationError) as exc: - logger.exception(exc) logger.error(msg="Unable to update geonode resource") result = False details = str(exc) @@ -427,7 +426,6 @@ def _update_harvestable_resources_batch( harvester=harvester, unique_identifier=remote_resource.unique_identifier, title=remote_resource.title, - abstract=remote_resource.abstract or "", defaults={ "should_be_harvested": harvester.harvest_new_resources_by_default, "remote_resource_type": remote_resource.resource_type, diff --git a/geonode/harvesting/tests/factories.py b/geonode/harvesting/tests/factories.py index e013a21da9f..76d9672d6b1 100644 --- a/geonode/harvesting/tests/factories.py +++ b/geonode/harvesting/tests/factories.py @@ -33,7 +33,6 @@ date=datetime.datetime.now(), date_type='type', originator=contact_example, - graphic_overview_uri='', place_keywords=['keyword'], other_keywords=('test',), license=['test'] diff --git a/geonode/harvesting/tests/harvesters/geonode.py b/geonode/harvesting/tests/harvesters/geonode.py index 499790c2ac9..43ff0817d74 100644 --- a/geonode/harvesting/tests/harvesters/geonode.py +++ b/geonode/harvesting/tests/harvesters/geonode.py @@ -200,4 +200,4 @@ def test_worker_from_django_record(self): self.assertFalse(worker.harvest_documents) self.assertTrue(worker.harvest_datasets) self.assertTrue(worker.harvest_maps) - self.assertEqual(worker.resource_title_filter, '') + self.assertEqual(worker.resource_name_filter, '') diff --git a/geonode/harvesting/tests/test_admin.py b/geonode/harvesting/tests/test_admin.py index 6768073c2b0..fd3975a4ad4 100644 --- a/geonode/harvesting/tests/test_admin.py +++ b/geonode/harvesting/tests/test_admin.py @@ -15,7 +15,7 @@ class HarvesterAdminTestCase(GeoNodeBaseTestSupport): - harvester_type = 'geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester' + harvester_type = 'geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker' def setUp(self): self.factory = RequestFactory() diff --git a/geonode/harvesting/tests/test_api_serializers.py b/geonode/harvesting/tests/test_api_serializers.py index 7bfb5a405b7..c6b74b96cf4 100644 --- a/geonode/harvesting/tests/test_api_serializers.py +++ b/geonode/harvesting/tests/test_api_serializers.py @@ -21,7 +21,7 @@ class BriefHarvesterSerializerTestCase(GeoNodeBaseTestSupport): remote_url = 'test.com' name = 'This is geonode harvester' user = get_user_model().objects.get(username='AnonymousUser') - harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester" + harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker" @classmethod def setUpTestData(cls): @@ -48,7 +48,7 @@ class HarvesterSerializerTestCase(GeoNodeBaseTestSupport): remote_url = 'test.com' name = 'This is geonode harvester' user = get_user_model().objects.get(username='AnonymousUser') - harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester" + harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker" @classmethod def setUpTestData(cls): @@ -76,7 +76,7 @@ def test_validate_also_validates_worker_specific_config(self, mock_validate_conf "name": "phony", "remote_url": "http://fake.com", "user": 1, - "harvester_type": "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester", + "harvester_type": "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker", "harvester_type_specific_configuration": {"something": "fake config"}, } @@ -125,7 +125,6 @@ def test_create(self): "remote_url": "http://fake.com", "user": 1, } - request = _REQUEST_FACTORY.post("/api/v2/harvesters/") request.user = self.user @@ -237,7 +236,7 @@ def setUpTestData(cls): remote_url = 'test.com' name = 'This is geonode harvester' user = get_user_model().objects.get(username='AnonymousUser') - harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester" + harvester_type = "geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker" cls.harvester = models.Harvester.objects.create( remote_url=remote_url, name=name, @@ -270,7 +269,7 @@ def setUpTestData(cls): remote_url='test.com', name='This is geonode harvester', default_owner=get_user_model().objects.get(username='AnonymousUser'), - harvester_type="geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester" + harvester_type="geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker" ) cls.harvestable_resource = models.HarvestableResource.objects.create( unique_identifier=cls.unique_identifier, diff --git a/geonode/harvesting/tests/test_harvester_worker_arcgis.py b/geonode/harvesting/tests/test_harvester_worker_arcgis.py new file mode 100644 index 00000000000..4549ffa48a0 --- /dev/null +++ b/geonode/harvesting/tests/test_harvester_worker_arcgis.py @@ -0,0 +1,78 @@ +######################################################################### +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### +from unittest import mock + +from geonode.harvesting.harvesters import arcgis +from geonode.tests.base import GeoNodeBaseSimpleTestSupport + + +class ArcgisModuleTestCase(GeoNodeBaseSimpleTestSupport): + + def test_parse_spatial_extent_with_latest_wkid(self): + raw_extent = { + "xmin": 10, + "ymin": 5, + "xmax": 20, + "ymax": 45, + "spatialReference": { + "wkid": 102100, + "latestWkid": 3857, + } + } + epsg_code, polygon = arcgis._parse_spatial_extent(raw_extent) + self.assertEqual(epsg_code, "EPSG:3857") + self.assertEqual(polygon.wkt, "POLYGON ((10 5, 10 45, 20 45, 20 5, 10 5))") + + def test_parse_spatial_extent_without_latest_wkid(self): + raw_extent = { + "xmin": 10, + "ymin": 5, + "xmax": 20, + "ymax": 45, + "spatialReference": { + "wkid": 102100, + } + } + epsg_code, polygon = arcgis._parse_spatial_extent(raw_extent) + self.assertEqual(epsg_code, "EPSG:102100") + self.assertEqual(polygon.wkt, "POLYGON ((10 5, 10 45, 20 45, 20 5, 10 5))") + + def test_parse_remote_url(self): + fixtures = [ + ("https://fake/rest/services/myservice/MapServer", "https://fake/rest/services", "myservice", "MapServer"), + ("https://fake/rest/services/myservice/MapServer/Query", "https://fake/rest/services", "myservice", "MapServer"), + ("https://fake/rest/services", "https://fake/rest/services", None, None), + ("https://fake/rest/services/myservice/ImageServer", "https://fake/rest/services", "myservice", "ImageServer"), + ] + for url, expected_cat_url, expected_service_name, expected_service_type in fixtures: + cat_url, service_name, service_type = arcgis.parse_remote_url(url) + self.assertEqual(cat_url, expected_cat_url) + self.assertEqual(service_name, expected_service_name) + self.assertEqual(service_type, expected_service_type) + + @mock.patch("geonode.harvesting.harvesters.arcgis.arcrest") + def test_get_resource_extractor(self, mock_arcrest): + fixtures = [ + ("http://somewhere/rest/services/fakeservice1/MapServer/1", mock_arcrest.MapService, arcgis.ArcgisMapServiceResourceExtractor), + ("http://somewhere/rest/services/fakeservice1/ImageServer/1", mock_arcrest.ImageService, arcgis.ArcgisImageServiceResourceExtractor), + ] + for identifier, mock_class, extractor_class in fixtures: + result = arcgis.get_resource_extractor(identifier) + mock_class.assert_called_with(identifier) + self.assertIsInstance(result, extractor_class) diff --git a/geonode/harvesting/tests/test_harvester_worker_geonode_legacy.py b/geonode/harvesting/tests/test_harvester_worker_geonode_legacy.py index ee900898745..5e897522135 100644 --- a/geonode/harvesting/tests/test_harvester_worker_geonode_legacy.py +++ b/geonode/harvesting/tests/test_harvester_worker_geonode_legacy.py @@ -16,7 +16,7 @@ # along with this program. If not, see . # ######################################################################### -import mock.mock +from unittest import mock from django.utils import timezone diff --git a/geonode/harvesting/tests/test_harvester_worker_wms.py b/geonode/harvesting/tests/test_harvester_worker_wms.py new file mode 100644 index 00000000000..6482de3ac2a --- /dev/null +++ b/geonode/harvesting/tests/test_harvester_worker_wms.py @@ -0,0 +1,32 @@ +######################################################################### +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### +from geonode.harvesting.harvesters import wms +from geonode.tests.base import GeoNodeBaseSimpleTestSupport + + +class WmsModuleTestCase(GeoNodeBaseSimpleTestSupport): + + def test_get_nsmap(self): + fixtures = [ + ({None: "ns1uri", "ns2": "ns2uri"}, {"wms": "ns1uri", "ns2": "ns2uri"}), + ({"ns1": "ns1uri", "ns2": "ns2uri"}, {"ns1": "ns1uri", "ns2": "ns2uri"}), + ] + for original, expected in fixtures: + result = wms._get_nsmap(original) + self.assertEqual(result, expected) diff --git a/geonode/harvesting/tests/test_tasks.py b/geonode/harvesting/tests/test_tasks.py index 89f42e9307f..66d3c76181e 100644 --- a/geonode/harvesting/tests/test_tasks.py +++ b/geonode/harvesting/tests/test_tasks.py @@ -20,7 +20,6 @@ from django.contrib.auth import get_user_model from django.utils.timezone import now - from geonode.tests.base import ( GeoNodeBaseTestSupport ) diff --git a/geonode/harvesting/tests/test_utils.py b/geonode/harvesting/tests/test_utils.py new file mode 100644 index 00000000000..9c05ff68419 --- /dev/null +++ b/geonode/harvesting/tests/test_utils.py @@ -0,0 +1,56 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### +from lxml import etree + +from geonode.tests.base import ( + GeoNodeBaseSimpleTestSupport, +) + +from .. import ( + utils, +) + + +class UtilsTestCase(GeoNodeBaseSimpleTestSupport): + + def test_get_xpath_value(self): + fixtures = [ + ( + "phony", + "/ns1:myElement/ns2:anotherElement", + None, + "phony" + ), + ( + "phony", + "ns2:anotherElement", + None, + "phony" + ), + ( + "phony", + "ns2:anotherElement/ns3:additional", + None, + "phony" + ), + ] + for element, xpath_expr, nsmap, expected in fixtures: + xml_el = etree.fromstring(element) + result = utils.get_xpath_value(xml_el, xpath_expr, nsmap=nsmap) + self.assertEqual(result, expected) diff --git a/geonode/layers/enumerations.py b/geonode/layers/enumerations.py index 12c98606346..6da0a72b463 100644 --- a/geonode/layers/enumerations.py +++ b/geonode/layers/enumerations.py @@ -34,3 +34,14 @@ 'xsd:unsignedShort', 'xsd:unsignedByte', ] +GXP_PTYPES = { + 'AUTO': 'gxp_wmscsource', + 'OWS': 'gxp_wmscsource', + 'WMS': 'gxp_wmscsource', + 'WFS': 'gxp_wmscsource', + 'WCS': 'gxp_wmscsource', + 'REST_MAP': 'gxp_arcrestsource', + 'REST_IMG': 'gxp_arcrestsource', + 'HGL': 'gxp_hglsource', + 'GN_WMS': 'gxp_geonodecataloguesource', +} diff --git a/geonode/layers/migrations/0037_layer_ptype.py b/geonode/layers/migrations/0037_layer_ptype.py index c56ecef26fd..0a0725cf26c 100644 --- a/geonode/layers/migrations/0037_layer_ptype.py +++ b/geonode/layers/migrations/0037_layer_ptype.py @@ -1,7 +1,7 @@ # Generated by Django 3.2.4 on 2021-07-20 17:09 from django.db import migrations, models -from geonode.services.enumerations import GXP_PTYPES +from geonode.layers.enumerations import GXP_PTYPES def update_remotes_attributes(apps, schema_editor): diff --git a/geonode/layers/views.py b/geonode/layers/views.py index 4195eb7467a..ebfdd36a04c 100644 --- a/geonode/layers/views.py +++ b/geonode/layers/views.py @@ -54,6 +54,7 @@ from guardian.shortcuts import get_objects_for_user from geonode import geoserver +from geonode.layers.enumerations import GXP_PTYPES from geonode.layers.metadata import parse_metadata from geonode.resource.manager import resource_manager from geonode.geoserver.helpers import set_dataset_style @@ -574,11 +575,24 @@ def sld_definition(style): source_params=json.dumps(source_params) ) else: - maplayer = GXPLayer( - name=layer.alternate, - ows_url=layer.ows_url, - dataset_params=json.dumps(config) - ) + is_arcgis_layer = layer.ptype in (GXP_PTYPES["REST_MAP"], GXP_PTYPES["REST_IMG"]) + if is_arcgis_layer: + maplayer = GXPLayer( + name=layer.alternate, + ows_url=layer.ows_url, + dataset_params=json.dumps(config), + source_params=json.dumps({ + "ptype": layer.ptype, + "remote": True, + "url": layer.ows_url, + }) + ) + else: + maplayer = GXPLayer( + name=layer.alternate, + ows_url=layer.ows_url, + dataset_params=json.dumps(config) + ) # Update count for popularity ranking, # but do not includes admins or resource owners layer.view_count_up(request.user) diff --git a/geonode/maps/tests.py b/geonode/maps/tests.py index bde5e4f4ed7..cf6095d92a4 100644 --- a/geonode/maps/tests.py +++ b/geonode/maps/tests.py @@ -710,7 +710,7 @@ def test_map_embed(self, thumbnail_mock): self.assertEqual(map_obj.title, title) self.assertEqual(map_obj.abstract, abstract) - self.assertEqual(map_obj.zoom, 6) + self.assertEqual(map_obj.zoom, 7) self.assertEqual(map_obj.projection, projection) @patch('geonode.thumbs.thumbnails.create_thumbnail') @@ -771,7 +771,7 @@ def test_map_view(self, thumbnail_mock): self.assertEqual(map_obj.title, title) self.assertEqual(map_obj.abstract, abstract) - self.assertEqual(map_obj.zoom, 6) + self.assertEqual(map_obj.zoom, 7) self.assertEqual(map_obj.projection, projection) for map_dataset in map_obj.datasets: diff --git a/geonode/resource/manager.py b/geonode/resource/manager.py index c13a5b2e730..e608a4909ea 100644 --- a/geonode/resource/manager.py +++ b/geonode/resource/manager.py @@ -315,8 +315,6 @@ def create(self, uuid: str, /, resource_type: typing.Optional[object] = None, de with transaction.atomic(): _resource.set_missing_info() _resource = self._concrete_resource_manager.create(uuid, resource_type=resource_type, defaults=defaults) - if _resource.bbox_polygon and not _resource.ll_bbox_polygon: - _resource.set_bounds_from_bbox(_resource.bbox_polygon, _resource.srid) _resource.set_processing_state(enumerations.STATE_PROCESSED) except Exception as e: logger.exception(e) @@ -355,8 +353,6 @@ def update(self, uuid: str, /, instance: ResourceBase = None, xml_file: str = No _resource.save() _resource = update_resource(instance=_resource.get_real_instance(), regions=regions, keywords=keywords, vals=vals) _resource = self._concrete_resource_manager.update(uuid, instance=_resource, notify=notify) - if _resource.bbox_polygon and not _resource.ll_bbox_polygon: - _resource.set_bounds_from_bbox(_resource.bbox_polygon, _resource.srid) _resource = metadata_storers(_resource.get_real_instance(), custom) # The following is only a demo proof of concept for a pluggable WF subsystem diff --git a/geonode/resource/utils.py b/geonode/resource/utils.py index a3a211ed13e..11d25e0efcb 100644 --- a/geonode/resource/utils.py +++ b/geonode/resource/utils.py @@ -401,6 +401,7 @@ def dataset_post_save(instance, *args, **kwargs): def metadata_post_save(instance, *args, **kwargs): logger.debug("handling UUID In pre_save_dataset") + defaults = {} if isinstance(instance, Dataset) and hasattr(settings, 'LAYER_UUID_HANDLER') and settings.LAYER_UUID_HANDLER != '': logger.debug("using custom uuid handler In pre_save_dataset") from ..layers.utils import get_uuid_handler @@ -409,14 +410,6 @@ def metadata_post_save(instance, *args, **kwargs): instance.uuid = _uuid Dataset.objects.filter(id=instance.id).update(uuid=_uuid) - # Fixup bbox - if instance.bbox_polygon is None: - instance.set_bbox_polygon((-180, -90, 180, 90), 'EPSG:4326') - instance.set_bounds_from_bbox( - instance.bbox_polygon, - instance.srid or instance.bbox_polygon.srid - ) - # Set a default user for accountstream to work correctly. if instance.owner is None: instance.owner = get_valid_user() @@ -430,17 +423,44 @@ def metadata_post_save(instance, *args, **kwargs): if license and len(license) > 0: instance.license = license[0] - instance.thumbnail_url = instance.get_thumbnail_url() + instance.thumbnail_url = instance.get_real_instance().get_thumbnail_url() instance.csw_insert_date = datetime.datetime.now(timezone.get_current_timezone()) instance.set_missing_info() - ResourceBase.objects.filter(id=instance.id).update( + defaults = dict( uuid=instance.uuid, - srid=instance.srid, + owner=instance.owner, + license=instance.license, alternate=instance.alternate, - bbox_polygon=instance.bbox_polygon, - thumbnail_url=instance.get_thumbnail_url(), - csw_insert_date=datetime.datetime.now(timezone.get_current_timezone()) + thumbnail_url=instance.thumbnail_url, + csw_insert_date=instance.csw_insert_date + ) + + # Fixup bbox + if instance.bbox_polygon is None: + instance.set_bbox_polygon((-180, -90, 180, 90), 'EPSG:4326') + defaults.update( + dict( + srid='EPSG:4326', + bbox_polygon=instance.bbox_polygon, + ll_bbox_polygon=instance.ll_bbox_polygon + ) + ) + if instance.ll_bbox_polygon is None: + instance.set_bounds_from_bbox( + instance.bbox_polygon, + instance.srid or instance.bbox_polygon.srid + ) + defaults.update( + dict( + srid=instance.srid, + bbox_polygon=instance.bbox_polygon, + ll_bbox_polygon=instance.ll_bbox_polygon + ) + ) + + ResourceBase.objects.filter(id=instance.id).update( + **defaults ) try: diff --git a/geonode/services/enumerations.py b/geonode/services/enumerations.py index 65552ae652c..67fa84f9b1c 100644 --- a/geonode/services/enumerations.py +++ b/geonode/services/enumerations.py @@ -45,29 +45,19 @@ (OWS, _('Paired WMS/WFS/WCS')), (WMS, _('Web Map Service')), # (CSW, _('Catalogue Service')), - # (REST_MAP, _('ArcGIS REST MapServer')), - # (REST_IMG, _('ArcGIS REST ImageServer')), + (REST_MAP, _('ArcGIS REST MapServer')), + (REST_IMG, _('ArcGIS REST ImageServer')), # (OGP, _('OpenGeoPortal')), # (HGL, _('Harvard Geospatial Library')), (GN_WMS, _('GeoNode (Web Map Service)')), # (GN_CSW, _('GeoNode (Catalogue Service)')), ) -GXP_PTYPES = { - 'AUTO': 'gxp_wmscsource', - 'OWS': 'gxp_wmscsource', - 'WMS': 'gxp_wmscsource', - 'WFS': 'gxp_wmscsource', - 'WCS': 'gxp_wmscsource', - 'REST_MAP': 'gxp_arcrestsource', - 'REST_IMG': 'gxp_arcrestsource', - 'HGL': 'gxp_hglsource', - 'GN_WMS': 'gxp_geonodecataloguesource', -} - HARVESTER_TYPES = { 'WMS': 'geonode.harvesting.harvesters.wms.OgcWmsHarvester', - 'GN_WMS': 'geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester', + 'GN_WMS': 'geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker', + 'REST_MAP': 'geonode.harvesting.harvesters.arcgis.ArcgisHarvesterWorker', + 'REST_IMG': 'geonode.harvesting.harvesters.arcgis.ArcgisHarvesterWorker', } QUEUED = "QUEUED" diff --git a/geonode/services/forms.py b/geonode/services/forms.py index e746e964a9a..50950b12b04 100644 --- a/geonode/services/forms.py +++ b/geonode/services/forms.py @@ -54,8 +54,8 @@ class CreateServiceForm(forms.Form): (enumerations.GN_WMS, _('GeoNode (Web Map Service)')), # (enumerations.GN_CSW, _('GeoNode (Catalogue Service)')), # (enumerations.CSW, _('Catalogue Service')), - # (enumerations.REST_MAP, _('ArcGIS REST MapServer')), - # (enumerations.REST_IMG, _('ArcGIS REST ImageServer')), + (enumerations.REST_MAP, _('ArcGIS REST MapServer')), + (enumerations.REST_IMG, _('ArcGIS REST ImageServer')), # (enumerations.OGP, _('OpenGeoPortal')), # (enumerations.HGL, _('Harvard Geospatial Library')), ), diff --git a/geonode/services/migrations/0053_alter_service_type.py b/geonode/services/migrations/0053_alter_service_type.py new file mode 100644 index 00000000000..20eead9dc41 --- /dev/null +++ b/geonode/services/migrations/0053_alter_service_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.7 on 2021-10-22 13:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('services', '0052_auto_20211004_1635'), + ] + + operations = [ + migrations.AlterField( + model_name='service', + name='type', + field=models.CharField(choices=[('OWS', 'Paired WMS/WFS/WCS'), ('WMS', 'Web Map Service'), ('REST_MAP', 'ArcGIS REST MapServer'), ('REST_IMG', 'ArcGIS REST ImageServer'), ('GN_WMS', 'GeoNode (Web Map Service)')], max_length=100), + ), + ] diff --git a/geonode/services/models.py b/geonode/services/models.py index 3630a4d5f04..2f304b536e7 100644 --- a/geonode/services/models.py +++ b/geonode/services/models.py @@ -28,6 +28,7 @@ from geonode.base.models import ResourceBase from geonode.harvesting.models import Harvester +from geonode.layers.enumerations import GXP_PTYPES from geonode.people.enumerations import ROLE_VALUES from . import enumerations @@ -120,7 +121,7 @@ def service_url(self): @property def ptype(self): # Return the gxp ptype that should be used to display layers - return enumerations.GXP_PTYPES[self.type] if self.type else None + return GXP_PTYPES[self.type] if self.type else None @property def service_type(self): diff --git a/geonode/services/serviceprocessors/arcgis.py b/geonode/services/serviceprocessors/arcgis.py index 056f566c00a..6620c246c44 100644 --- a/geonode/services/serviceprocessors/arcgis.py +++ b/geonode/services/serviceprocessors/arcgis.py @@ -31,7 +31,7 @@ from geonode.layers.models import Dataset from geonode.base.bbox_utils import BBOXHelper -# from geonode.harvesting.models import Harvester +from geonode.harvesting.models import Harvester from arcrest import MapService as ArcMapService, ImageService as ArcImageService @@ -91,6 +91,12 @@ def __init__(self, url, geonode_service_id=None): def parsed_service(self): return ArcMapService(self.url) + def probe(self): + try: + return True if len(self.parsed_service._json_struct) > 0 else False + except Exception: + return False + def create_cascaded_store(self, service): return None @@ -115,16 +121,16 @@ def create_geonode_service(self, owner): abstract=str(self.parsed_service._json_struct.get("serviceDescription")).encode("utf-8", "ignore").decode('utf-8') or _( "Not provided") ) - # TODO: once the ArcGIS Harvester will be available - # service_harvester = Harvester.objects.create( - # name=self.name, - # default_owner=owner, - # remote_url=instance.service_url, - # harvester_type=enumerations.HARVESTER_TYPES[self.type] - # ) - # service_harvester.update_availability() - # service_harvester.initiate_update_harvestable_resources() - # instance.harvester = service_harvester + service_harvester = Harvester.objects.create( + name=self.name, + default_owner=owner, + remote_url=instance.service_url, + harvester_type=enumerations.HARVESTER_TYPES[self.service_type], + harvester_type_specific_configuration=self.get_harvester_configuration_options() + ) + service_harvester.update_availability() + service_harvester.initiate_update_harvestable_resources() + instance.harvester = service_harvester self.geonode_service_id = instance.id return instance @@ -132,6 +138,12 @@ def create_geonode_service(self, owner): def get_keywords(self): return self.parsed_service._json_struct.get("capabilities", "").split(",") + def get_harvester_configuration_options(self): + return { + "harvest_map_services": True, + "harvest_image_services": False + } + def _parse_datasets(self, layers): map_datasets = [] for lyr in layers: @@ -242,3 +254,9 @@ def __init__(self, url): @property def parsed_service(self): return ArcImageService(self.url) + + def get_harvester_configuration_options(self): + return { + "harvest_map_services": False, + "harvest_image_services": True + } diff --git a/geonode/services/serviceprocessors/handler.py b/geonode/services/serviceprocessors/handler.py index 835761f18bd..6160ef4afb1 100644 --- a/geonode/services/serviceprocessors/handler.py +++ b/geonode/services/serviceprocessors/handler.py @@ -24,7 +24,7 @@ from collections import OrderedDict from .. import enumerations -# from .arcgis import ArcMapServiceHandler, ArcImageServiceHandler +from .arcgis import ArcMapServiceHandler, ArcImageServiceHandler from .wms import WmsServiceHandler, GeoNodeServiceHandler # noqa logger = logging.getLogger(__name__) @@ -39,8 +39,8 @@ def get_service_handler(base_url, service_type=enumerations.AUTO, service_id=Non enumerations.GN_WMS: {"OWS": True, "handler": GeoNodeServiceHandler}, # enumerations.WFS: {"OWS": True, "handler": ServiceHandlerBase}, # enumerations.TMS: {"OWS": False, "handler": ServiceHandlerBase}, - # enumerations.REST_MAP: {"OWS": False, "handler": ArcMapServiceHandler}, - # enumerations.REST_IMG: {"OWS": False, "handler": ArcImageServiceHandler}, + enumerations.REST_MAP: {"OWS": False, "handler": ArcMapServiceHandler}, + enumerations.REST_IMG: {"OWS": False, "handler": ArcImageServiceHandler}, # enumerations.CSW: {"OWS": False, "handler": ServiceHandlerBase}, # enumerations.HGL: {"OWS": True, "handler": ServiceHandlerBase}, # TODO: verify this # enumerations.OGP: {"OWS": False, "handler": ServiceHandlerBase}, # TODO: verify this diff --git a/geonode/settings.py b/geonode/settings.py index d8f80770260..efc8459bd9b 100644 --- a/geonode/settings.py +++ b/geonode/settings.py @@ -704,7 +704,7 @@ selenium_tests = ast.literal_eval(os.environ.get('TEST_RUN_SELENIUM', 'False')) # Django 1.11 ParallelTestSuite -TEST_RUNNER = 'geonode.tests.suite.runner.GeoNodeBaseSuiteDiscoverRunner' +# TEST_RUNNER = 'geonode.tests.suite.runner.GeoNodeBaseSuiteDiscoverRunner' TEST_RUNNER_KEEPDB = os.environ.get('TEST_RUNNER_KEEPDB', 0) TEST_RUNNER_PARALLEL = os.environ.get('TEST_RUNNER_PARALLEL', 1) diff --git a/geonode/thumbs/tests/test_unit.py b/geonode/thumbs/tests/test_unit.py index 2ed280f7f62..d0589a7ef87 100644 --- a/geonode/thumbs/tests/test_unit.py +++ b/geonode/thumbs/tests/test_unit.py @@ -133,7 +133,7 @@ def test_datasets_locations_dataset(self): self.assertEqual(locations, [[settings.OGC_SERVER["default"]["LOCATION"], [dataset.alternate], []]]) def test_datasets_locations_dataset_default_bbox(self): - expected_bbox = [-8238681.428369759, -8220320.787127878, 4969844.155936863, 4984363.9488296695, "EPSG:3857"] + expected_bbox = [-8238681.374829309, -8220320.783295829, 4969844.093033709, 4984363.884452854, "EPSG:3857"] dataset = Dataset.objects.get(title_en="theaters_nyc") locations, bbox = thumbnails._datasets_locations(dataset, compute_bbox=True) @@ -169,7 +169,7 @@ def test_datasets_locations_simple_map(self): self.assertEqual(locations, [[settings.OGC_SERVER["default"]["LOCATION"], ["geonode:Meteorite_Landings_from_NASA_Open_Data_Portal1", dataset.alternate], ["test_style", "theaters_nyc"]]]) def test_datasets_locations_simple_map_default_bbox(self): - expected_bbox = [-8238681.428369759, -8220320.787127878, 4969844.155936863, 4984363.9488296695, "EPSG:3857"] + expected_bbox = [-8238681.374829309, -8220320.783295829, 4969844.093033709, 4984363.884452854, "EPSG:3857"] dataset = Dataset.objects.get(title_en="theaters_nyc") map = Map.objects.get(title_en="theaters_nyc_map") @@ -181,7 +181,7 @@ def test_datasets_locations_simple_map_default_bbox(self): self.assertEqual(locations, [[settings.OGC_SERVER["default"]["LOCATION"], [dataset.alternate], ["theaters_nyc"]]]) def test_datasets_locations_composition_map_default_bbox(self): - expected_bbox = [-18411664.521739896, 1414810.0631394347, -20040289.59992574, 16329038.485056708, 'EPSG:3857'] + expected_bbox = [-20033947.41086791, 1414810.0631394347, -20041642.2309585, 16329038.485056704, 'EPSG:3857'] expected_locations = [ [ settings.GEOSERVER_LOCATION, diff --git a/geonode/thumbs/utils.py b/geonode/thumbs/utils.py index 49d3fd1dd4f..593dbab9970 100644 --- a/geonode/thumbs/utils.py +++ b/geonode/thumbs/utils.py @@ -16,12 +16,12 @@ # along with this program. If not, see . # ######################################################################### - +import re import time import base64 import logging -from pyproj import Transformer, CRS +from pyproj import CRS from owslib.wms import WebMapService from typing import List, Tuple, Callable, Union @@ -30,7 +30,9 @@ from geonode.maps.models import Map from geonode.layers.models import Dataset -from geonode.utils import OGC_Servers_Handler +from geonode.utils import ( + bbox_to_projection, + OGC_Servers_Handler) from geonode.base.auth import get_or_create_token from geonode.thumbs.exceptions import ThumbnailError @@ -72,11 +74,9 @@ def transform_bbox(bbox: List, target_crs: str = "EPSG:3857"): Function transforming BBOX in dataset compliant format (xmin, xmax, ymin, ymax, 'EPSG:xxxx') to another CRS, preserving overflow values. """ - transformer = Transformer.from_crs(bbox[-1].lower(), target_crs.lower(), always_xy=True) - x_min, y_min = transformer.transform(bbox[0], bbox[2]) - x_max, y_max = transformer.transform(bbox[1], bbox[3]) - - return [x_min, x_max, y_min, y_max, target_crs] + match = re.match(r'^(EPSG:)?(?P\d{4,6})$', str(target_crs)) + target_srid = int(match.group('srid')) if match else 4326 + return list(bbox_to_projection(bbox, target_srid=target_srid))[:-1] + [target_crs] def expand_bbox_to_ratio( diff --git a/geonode/utils.py b/geonode/utils.py index d7e7bd15752..48b5df3cdb9 100755 --- a/geonode/utils.py +++ b/geonode/utils.py @@ -534,7 +534,8 @@ def bbox_to_projection(native_bbox, target_srid=4326): g.Transform(CoordinateTransformation(source, dest)) projected_bbox = [str(x) for x in g.GetEnvelope()] # Must be in the form : [x0, x1, y0, y1, EPSG:) - return tuple([projected_bbox[0], projected_bbox[1], projected_bbox[2], projected_bbox[3]]) + \ + return tuple( + [float(projected_bbox[0]), float(projected_bbox[1]), float(projected_bbox[2]), float(projected_bbox[3])]) + \ (f"EPSG:{target_srid}",) except Exception as e: logger.exception(e)