From e32452724c89c502c1b8ecc4d2f4618d9908eb9b Mon Sep 17 00:00:00 2001 From: Mattia Date: Wed, 22 May 2024 13:20:11 +0200 Subject: [PATCH 01/13] [Fixes #242] CRS parsing is not correctly handled for CSV files --- importer/handlers/csv/handler.py | 9 +++++- importer/tests/end2end/test_end2end.py | 43 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/importer/handlers/csv/handler.py b/importer/handlers/csv/handler.py index d805883a..67b338f8 100644 --- a/importer/handlers/csv/handler.py +++ b/importer/handlers/csv/handler.py @@ -244,9 +244,16 @@ def extract_resource_to_publish( { "name": alternate or layer_name, "crs": ( - self.identify_authority(_l) if _l.GetSpatialRef() else "EPSG:4326" + self.identify_authority(_l) ), } for _l in layers if self.fixup_name(_l.GetName()) == layer_name ] + + def identify_authority(self, layer): + try: + authority_code = super().identify_authority(layer=layer) + return authority_code + except Exception: + return "EPSG:4326" diff --git a/importer/tests/end2end/test_end2end.py b/importer/tests/end2end/test_end2end.py index 0cc3001b..a042e270 100644 --- a/importer/tests/end2end/test_end2end.py +++ b/importer/tests/end2end/test_end2end.py @@ -37,6 +37,7 @@ def setUpClass(cls) -> None: } cls.valid_kml = f"{project_dir}/tests/fixture/valid.kml" cls.valid_tif = f"{project_dir}/tests/fixture/test_grid.tif" + cls.valid_csv = f"{project_dir}/tests/fixture/valid.csv" cls.url = reverse("importer_upload") ogc_server_settings = OGC_Servers_Handler(settings.OGC_SERVER)["default"] @@ -258,6 +259,48 @@ def test_import_geojson_overwrite(self): self.cat.delete(layer) +class ImporterGCSVImportTest(BaseImporterEndToEndTest): + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings( + GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data" + ) + def test_import_geojson(self): + layer = self.cat.get_layer("geonode:valid") + if layer: + self.cat.delete(layer) + + payload = { + "base_file": open(self.valid_csv, "rb"), + } + initial_name = "valid" + self._assertimport(payload, initial_name) + layer = self.cat.get_layer("geonode:valid") + if layer: + self.cat.delete(layer) + + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings( + GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data" + ) + def test_import_csv_overwrite(self): + prev_dataset = create_single_dataset(name="valid") + + layer = self.cat.get_layer("geonode:valid") + if layer: + self.cat.delete(layer) + payload = { + "base_file": open(self.valid_csv, "rb"), + } + initial_name = "valid" + payload["overwrite_existing_layer"] = True + self._assertimport( + payload, initial_name, overwrite=True, last_update=prev_dataset.last_updated + ) + layer = self.cat.get_layer("geonode:valid") + if layer: + self.cat.delete(layer) + + class ImporterKMLImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings( From 4092e1b30ed86edabca49d3d098bead49f00f5ad Mon Sep 17 00:00:00 2001 From: "G. Allegri" Date: Thu, 23 May 2024 09:55:35 +0200 Subject: [PATCH 02/13] bump version --- importer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/importer/__init__.py b/importer/__init__.py index e3c65827..4138c65c 100644 --- a/importer/__init__.py +++ b/importer/__init__.py @@ -20,7 +20,7 @@ project_dir = os.path.dirname(os.path.abspath(__file__)) -VERSION = (1, 0, 10) +VERSION = (1, 1, 0) __version__ = ".".join([str(i) for i in VERSION]) __author__ = "geosolutions-it" __email__ = "info@geosolutionsgroup.com" From caa720047b6d49080483bbd382679a8fbe16261b Mon Sep 17 00:00:00 2001 From: Mattia Date: Thu, 23 May 2024 15:23:00 +0200 Subject: [PATCH 03/13] Let importer create the asset --- importer/api/views.py | 39 ++++++++++++++++++++++-- importer/celery_tasks.py | 10 ++++-- importer/handlers/base.py | 8 ++--- importer/handlers/common/raster.py | 2 +- importer/handlers/common/tests_vector.py | 2 +- importer/handlers/common/vector.py | 17 +++-------- importer/handlers/csv/handler.py | 4 +-- importer/handlers/sld/tests.py | 5 ++- importer/handlers/xml/tests.py | 5 ++- importer/tests/unit/test_task.py | 2 +- 10 files changed, 64 insertions(+), 30 deletions(-) diff --git a/importer/api/views.py b/importer/api/views.py index 0d71b5eb..a5bd7a10 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -17,6 +17,8 @@ # ######################################################################### import logging +import os +from pathlib import Path from urllib.parse import urljoin from django.conf import settings from django.urls import reverse @@ -46,6 +48,9 @@ from rest_framework.parsers import FileUploadParser, MultiPartParser from rest_framework.permissions import IsAuthenticatedOrReadOnly from rest_framework.response import Response +from geonode.assets.handlers import asset_handler_registry +from geonode.utils import get_allowed_extensions +from geonode.assets.local import LocalAssetHandler logger = logging.getLogger(__name__) @@ -91,6 +96,8 @@ def create(self, request, *args, **kwargs): """ _file = request.FILES.get("base_file") or request.data.get("base_file") execution_id = None + asset_handler = LocalAssetHandler() + asset_dir = asset_handler._create_asset_dir() serializer = self.get_serializer_class() data = serializer(data=request.data) @@ -111,7 +118,9 @@ def create(self, request, *args, **kwargs): remote_files={"base_file": _data.get("zip_file", _data.get("kmz_file"))} ) # cloning and unzip the base_file - storage_manager.clone_remote_files() + storage_manager.clone_remote_files( + cloning_directory=asset_dir, create_tempdir=False + ) # update the payload with the unziped paths _data.update(storage_manager.get_retrieved_paths()) @@ -125,9 +134,13 @@ def create(self, request, *args, **kwargs): # means that the storage manager is not initialized yet, so # the file is not a zip storage_manager = StorageManager(remote_files=_data) - storage_manager.clone_remote_files() + storage_manager.clone_remote_files( + cloning_directory=asset_dir, create_tempdir=False + ) # get filepath - files = storage_manager.get_retrieved_paths() + asset, files = self.generate_asset_and_retrieve_paths( + request, storage_manager, handler + ) upload_validator = UploadLimitValidator(request.user) upload_validator.validate_parallelism_limit_per_user() @@ -144,6 +157,10 @@ def create(self, request, *args, **kwargs): input_params={ **{"files": files, "handler_module_path": str(handler)}, **extracted_params, + **{ + "asset_id": asset.id, + "asset_module_path": f"{asset.__module__}.{asset.__class__.__name__}", + }, }, legacy_upload_name=_file.name, action=action, @@ -161,6 +178,8 @@ def create(self, request, *args, **kwargs): # cloned files to keep the storage under control if storage_manager is not None: storage_manager.delete_retrieved_paths(force=True) + if asset: + asset.objects.delete() if execution_id: orchestrator.set_as_failed(execution_id=str(execution_id), reason=e) logger.exception(e) @@ -168,6 +187,20 @@ def create(self, request, *args, **kwargs): raise ImportException(detail="No handlers found for this dataset type") + def generate_asset_and_retrieve_paths(self, request, storage_manager, handler): + asset_handler = asset_handler_registry.get_default_handler() + _files = storage_manager.get_retrieved_paths() + asset = asset_handler.create( + title="Original", + owner=request.user, + description=None, + type=str(handler), + files=list(set(_files.values())), + clone_files=False, + ) + + return asset, _files + class ResourceImporter(DynamicModelViewSet): authentication_classes = [ diff --git a/importer/celery_tasks.py b/importer/celery_tasks.py index c7cfb2f9..a86d3da4 100644 --- a/importer/celery_tasks.py +++ b/importer/celery_tasks.py @@ -329,6 +329,12 @@ def create_geonode_resource( _files = _exec.input_params.get("files") + _asset = ( + import_string(_exec.input_params.get("asset_module_path")) + .objects.filter(id=_exec.input_params.get("asset_id")) + .first() + ) + handler = import_string(handler_module_path)() _overwrite = _exec.input_params.get("overwrite_existing_layer") @@ -337,14 +343,14 @@ def create_geonode_resource( layer_name=layer_name, alternate=alternate, execution_id=execution_id, - files=_files, + asset=_asset, ) else: resource = handler.create_geonode_resource( layer_name=layer_name, alternate=alternate, execution_id=execution_id, - files=_files, + asset=_asset, ) if _overwrite: diff --git a/importer/handlers/base.py b/importer/handlers/base.py index 6e29446e..43c1041f 100644 --- a/importer/handlers/base.py +++ b/importer/handlers/base.py @@ -150,12 +150,12 @@ def perform_last_step(execution_id): ] _exec.output_params.update({"resources": resource_output_params}) _exec.save() - + # since the original file is now available as asset, we can delete the input files # TODO must be improved. The asset should be created in the beginning - for _file in _exec.input_params.get("files", {}).values(): - if storage_manager.exists(_file): - storage_manager.delete(_file) + # for _file in _exec.input_params.get("files", {}).values(): + # if storage_manager.exists(_file): + # storage_manager.delete(_file) return _exec diff --git a/importer/handlers/common/raster.py b/importer/handlers/common/raster.py index ae9cd968..4f261020 100644 --- a/importer/handlers/common/raster.py +++ b/importer/handlers/common/raster.py @@ -312,7 +312,7 @@ def create_geonode_resource( alternate: str, execution_id: str, resource_type: Dataset = Dataset, - files=None, + asset=None, ): """ Base function to create the resource into geonode. Each handler can specify diff --git a/importer/handlers/common/tests_vector.py b/importer/handlers/common/tests_vector.py index a2fe8efb..300b09e9 100644 --- a/importer/handlers/common/tests_vector.py +++ b/importer/handlers/common/tests_vector.py @@ -328,7 +328,7 @@ def test_select_valid_layers(self): self.assertEqual(1, len(valid_layer)) self.assertEqual("mattia_test", valid_layer[0].GetName()) - @override_settings(MEDIA_ROOT='/tmp') + @override_settings(MEDIA_ROOT="/tmp") def test_perform_last_step(self): """ Output params in perform_last_step should return the detail_url and the ID diff --git a/importer/handlers/common/vector.py b/importer/handlers/common/vector.py index d6b6d3a0..a6e9d526 100644 --- a/importer/handlers/common/vector.py +++ b/importer/handlers/common/vector.py @@ -228,8 +228,7 @@ def perform_last_step(execution_id): # which delete the file from the filesystem for asset in assets: asset.delete() - - + def extract_resource_to_publish( self, files, action, layer_name, alternate, **kwargs ): @@ -568,7 +567,7 @@ def create_geonode_resource( alternate: str, execution_id: str, resource_type: Dataset = Dataset, - files=None, + asset=None, ): """ Base function to create the resource into geonode. Each handler can specify @@ -591,6 +590,7 @@ def create_geonode_resource( logger.warning( f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created" ) + saved_dataset = resource_manager.create( None, resource_type=resource_type, @@ -603,16 +603,7 @@ def create_geonode_resource( dirty_state=True, title=layer_name, owner=_exec.user, - data_title="Original", - data_type=self.supported_file_extension_config["label"], - extension=self.supported_file_extension_config["id"], - link_type="uploaded", # should be in geonode.base.enumerations.LINK_TYPES - files=list( - set( - list(_exec.input_params.get("files", {}).values()) - or list(files) - ) - ), + asset=asset, ), ) diff --git a/importer/handlers/csv/handler.py b/importer/handlers/csv/handler.py index 67b338f8..f1433ed2 100644 --- a/importer/handlers/csv/handler.py +++ b/importer/handlers/csv/handler.py @@ -243,9 +243,7 @@ def extract_resource_to_publish( return [ { "name": alternate or layer_name, - "crs": ( - self.identify_authority(_l) - ), + "crs": (self.identify_authority(_l)), } for _l in layers if self.fixup_name(_l.GetName()) == layer_name diff --git a/importer/handlers/sld/tests.py b/importer/handlers/sld/tests.py index ace68be0..26f0eb99 100644 --- a/importer/handlers/sld/tests.py +++ b/importer/handlers/sld/tests.py @@ -24,7 +24,10 @@ def setUpClass(cls): cls.user, _ = get_user_model().objects.get_or_create(username="admin") cls.invalid_files = {"base_file": cls.invalid_sld, "sld_file": cls.invalid_sld} - cls.valid_files = {"base_file": "/tmp/test_sld.sld", "sld_file": "/tmp/test_sld.sld"} + cls.valid_files = { + "base_file": "/tmp/test_sld.sld", + "sld_file": "/tmp/test_sld.sld", + } cls.owner = get_user_model().objects.first() cls.layer = create_single_dataset(name="sld_dataset", owner=cls.owner) diff --git a/importer/handlers/xml/tests.py b/importer/handlers/xml/tests.py index 9262f7c8..8f51e3cc 100644 --- a/importer/handlers/xml/tests.py +++ b/importer/handlers/xml/tests.py @@ -23,7 +23,10 @@ def setUpClass(cls): shutil.copy(cls.valid_xml, "/tmp") cls.user, _ = get_user_model().objects.get_or_create(username="admin") cls.invalid_files = {"base_file": cls.invalid_xml, "xml_file": cls.invalid_xml} - cls.valid_files = {"base_file": "/tmp/test_xml.xml", "xml_file": "/tmp/test_xml.xml"} + cls.valid_files = { + "base_file": "/tmp/test_xml.xml", + "xml_file": "/tmp/test_xml.xml", + } cls.owner = get_user_model().objects.first() cls.layer = create_single_dataset(name="extruded_polygon", owner=cls.owner) diff --git a/importer/tests/unit/test_task.py b/importer/tests/unit/test_task.py index 8d37b219..9b49b483 100644 --- a/importer/tests/unit/test_task.py +++ b/importer/tests/unit/test_task.py @@ -506,7 +506,7 @@ def test_import_metadata_should_work_as_expected(self): layer.refresh_from_db() self.assertEqual(layer.title, "test_dataset") - #verify that the original has been deleted + # verify that the original has been deleted self.assertFalse(os.path.exists(xml_in_tmp)) From 437346a76d51a3f7706c132f37d148c67d45bb84 Mon Sep 17 00:00:00 2001 From: Mattia Date: Thu, 23 May 2024 15:27:21 +0200 Subject: [PATCH 04/13] Let importer create the asset --- importer/handlers/common/raster.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/importer/handlers/common/raster.py b/importer/handlers/common/raster.py index 4f261020..522a2830 100644 --- a/importer/handlers/common/raster.py +++ b/importer/handlers/common/raster.py @@ -335,6 +335,7 @@ def create_geonode_resource( logger.warning( f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created" ) + saved_dataset = resource_manager.create( None, resource_type=resource_type, @@ -346,16 +347,7 @@ def create_geonode_resource( dirty_state=True, title=layer_name, owner=_exec.user, - extension=self.supported_file_extension_config["id"], - data_title="Original", - data_type=self.supported_file_extension_config["label"], - link_type="uploaded", # should be in geonode.base.enumerations.LINK_TYPES - files=list( - set( - list(_exec.input_params.get("files", {}).values()) - or list(files) - ) - ), + asset=asset ), ) From 50d2a08b8874c2d2a277f48b49e17a60bdb75f8f Mon Sep 17 00:00:00 2001 From: etj Date: Fri, 24 May 2024 09:41:44 +0200 Subject: [PATCH 05/13] Fixes --- importer/api/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/importer/api/views.py b/importer/api/views.py index a5bd7a10..b5d145ca 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -127,6 +127,7 @@ def create(self, request, *args, **kwargs): handler = orchestrator.get_handler(_data) if _file and handler: + asset = None try: # cloning data into a local folder extracted_params, _data = handler.extract_params_from_data(_data) @@ -179,7 +180,7 @@ def create(self, request, *args, **kwargs): if storage_manager is not None: storage_manager.delete_retrieved_paths(force=True) if asset: - asset.objects.delete() + asset.delete() if execution_id: orchestrator.set_as_failed(execution_id=str(execution_id), reason=e) logger.exception(e) From c2b4f2c2a9c896157cb64810ac6b614ab500bfc1 Mon Sep 17 00:00:00 2001 From: Mattia Date: Fri, 24 May 2024 15:48:57 +0200 Subject: [PATCH 06/13] Add test coverage for asset-importer --- importer/api/tests.py | 48 +++++++++++++++++++++++++++++++++++++++++++ importer/api/views.py | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/importer/api/tests.py b/importer/api/tests.py index bcf8b357..a0f98730 100644 --- a/importer/api/tests.py +++ b/importer/api/tests.py @@ -11,6 +11,9 @@ from importer.models import ResourceHandlerInfo from importer.tests.utils import ImporterBaseTestSupport +from importer.orchestrator import orchestrator +from django.utils.module_loading import import_string +from geonode.assets.models import LocalAsset class TestImporterViewSet(ImporterBaseTestSupport): @@ -153,3 +156,48 @@ def test_copy_ther_resource_if_file_handler_is_set(self, _orc): self.assertEqual(200, response.status_code) _orc.s.assert_called_once() + + @patch("importer.api.views.import_orchestrator") + def test_asset_is_created_before_the_import_start(self, patch_upload): + patch_upload.apply_async.side_effect = MagicMock() + + self.client.force_login(get_user_model().objects.get(username="admin")) + payload = { + "base_file": SimpleUploadedFile( + name="test.geojson", content=b"some-content" + ), + "store_spatial_files": True, + } + + response = self.client.post(self.url, data=payload) + + self.assertEqual(201, response.status_code) + + self.assertTrue(201, response.status_code) + + _exec = orchestrator.get_execution_object(response.json()['execution_id']) + + asset_handler = import_string(_exec.input_params['asset_module_path']) + self.assertTrue(asset_handler.objects.filter(id=_exec.input_params['asset_id'])) + + asset_handler.objects.filter(id=_exec.input_params['asset_id']).delete() + + + @patch("importer.api.views.import_orchestrator") + @patch("importer.api.views.UploadLimitValidator.validate_parallelism_limit_per_user") + def test_asset_should_be_deleted_if_created_during_with_exception(self, validate_parallelism_limit_per_user, patch_upload): + patch_upload.apply_async.s.side_effect = MagicMock() + validate_parallelism_limit_per_user.side_effect = Exception("random exception") + + self.client.force_login(get_user_model().objects.get(username="admin")) + payload = { + "base_file": SimpleUploadedFile( + name="test.geojson", content=b"some-content" + ), + "store_spatial_files": True, + } + + response = self.client.post(self.url, data=payload) + + self.assertEqual(500, response.status_code) + self.assertFalse(LocalAsset.objects.exists()) diff --git a/importer/api/views.py b/importer/api/views.py index a5bd7a10..a3ecef67 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -179,7 +179,7 @@ def create(self, request, *args, **kwargs): if storage_manager is not None: storage_manager.delete_retrieved_paths(force=True) if asset: - asset.objects.delete() + asset.delete() if execution_id: orchestrator.set_as_failed(execution_id=str(execution_id), reason=e) logger.exception(e) From 749f085c26cb246d56c7b9d0d9287d52975c71fc Mon Sep 17 00:00:00 2001 From: Mattia Date: Fri, 24 May 2024 15:51:03 +0200 Subject: [PATCH 07/13] Add test coverage for asset-importer --- importer/api/tests.py | 17 ++++++++++------- importer/api/views.py | 3 --- importer/handlers/base.py | 7 ------- importer/handlers/common/metadata.py | 1 - importer/handlers/common/raster.py | 2 +- importer/handlers/common/vector.py | 1 - 6 files changed, 11 insertions(+), 20 deletions(-) diff --git a/importer/api/tests.py b/importer/api/tests.py index a0f98730..7c54b2ba 100644 --- a/importer/api/tests.py +++ b/importer/api/tests.py @@ -175,17 +175,20 @@ def test_asset_is_created_before_the_import_start(self, patch_upload): self.assertTrue(201, response.status_code) - _exec = orchestrator.get_execution_object(response.json()['execution_id']) + _exec = orchestrator.get_execution_object(response.json()["execution_id"]) - asset_handler = import_string(_exec.input_params['asset_module_path']) - self.assertTrue(asset_handler.objects.filter(id=_exec.input_params['asset_id'])) - - asset_handler.objects.filter(id=_exec.input_params['asset_id']).delete() + asset_handler = import_string(_exec.input_params["asset_module_path"]) + self.assertTrue(asset_handler.objects.filter(id=_exec.input_params["asset_id"])) + asset_handler.objects.filter(id=_exec.input_params["asset_id"]).delete() @patch("importer.api.views.import_orchestrator") - @patch("importer.api.views.UploadLimitValidator.validate_parallelism_limit_per_user") - def test_asset_should_be_deleted_if_created_during_with_exception(self, validate_parallelism_limit_per_user, patch_upload): + @patch( + "importer.api.views.UploadLimitValidator.validate_parallelism_limit_per_user" + ) + def test_asset_should_be_deleted_if_created_during_with_exception( + self, validate_parallelism_limit_per_user, patch_upload + ): patch_upload.apply_async.s.side_effect = MagicMock() validate_parallelism_limit_per_user.side_effect = Exception("random exception") diff --git a/importer/api/views.py b/importer/api/views.py index b5d145ca..d3f34968 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -17,8 +17,6 @@ # ######################################################################### import logging -import os -from pathlib import Path from urllib.parse import urljoin from django.conf import settings from django.urls import reverse @@ -49,7 +47,6 @@ from rest_framework.permissions import IsAuthenticatedOrReadOnly from rest_framework.response import Response from geonode.assets.handlers import asset_handler_registry -from geonode.utils import get_allowed_extensions from geonode.assets.local import LocalAssetHandler logger = logging.getLogger(__name__) diff --git a/importer/handlers/base.py b/importer/handlers/base.py index 43c1041f..e005fcd4 100644 --- a/importer/handlers/base.py +++ b/importer/handlers/base.py @@ -7,7 +7,6 @@ from importer.utils import ImporterRequestAction as ira from django_celery_results.models import TaskResult from django.db.models import Q -from geonode.storage.manager import storage_manager logger = logging.getLogger(__name__) @@ -151,12 +150,6 @@ def perform_last_step(execution_id): _exec.output_params.update({"resources": resource_output_params}) _exec.save() - # since the original file is now available as asset, we can delete the input files - # TODO must be improved. The asset should be created in the beginning - # for _file in _exec.input_params.get("files", {}).values(): - # if storage_manager.exists(_file): - # storage_manager.delete(_file) - return _exec def fixup_name(self, name): diff --git a/importer/handlers/common/metadata.py b/importer/handlers/common/metadata.py index 8b86db1c..14a80454 100644 --- a/importer/handlers/common/metadata.py +++ b/importer/handlers/common/metadata.py @@ -7,7 +7,6 @@ from importer.orchestrator import orchestrator from django.shortcuts import get_object_or_404 from geonode.layers.models import Dataset -from geonode.storage.manager import storage_manager logger = logging.getLogger(__name__) diff --git a/importer/handlers/common/raster.py b/importer/handlers/common/raster.py index 522a2830..419c86d6 100644 --- a/importer/handlers/common/raster.py +++ b/importer/handlers/common/raster.py @@ -347,7 +347,7 @@ def create_geonode_resource( dirty_state=True, title=layer_name, owner=_exec.user, - asset=asset + asset=asset, ), ) diff --git a/importer/handlers/common/vector.py b/importer/handlers/common/vector.py index a6e9d526..7d4b34bb 100644 --- a/importer/handlers/common/vector.py +++ b/importer/handlers/common/vector.py @@ -2,7 +2,6 @@ from django.db import connections from importer.publisher import DataPublisher from importer.utils import call_rollback_function, find_key_recursively -from itertools import chain import json import logging import os From 4e37f9de1548754a9b2de5375f99569374543775 Mon Sep 17 00:00:00 2001 From: Mattia Date: Fri, 24 May 2024 15:53:36 +0200 Subject: [PATCH 08/13] Add test coverage for asset-importer --- importer/handlers/common/vector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/importer/handlers/common/vector.py b/importer/handlers/common/vector.py index 7d4b34bb..d311b07c 100644 --- a/importer/handlers/common/vector.py +++ b/importer/handlers/common/vector.py @@ -224,7 +224,7 @@ def perform_last_step(execution_id): # getting all assets list assets = [get_default_asset(x.resource) for x in resources] # we need to loop and cancel one by one to activate the signal - # which delete the file from the filesystem + # that delete the file from the filesystem for asset in assets: asset.delete() From e1344a9f5e0122c90cfb6a2f85f025f3c725b648 Mon Sep 17 00:00:00 2001 From: Mattia Date: Fri, 24 May 2024 15:59:53 +0200 Subject: [PATCH 09/13] Add test coverage for asset-importer --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a6fc2c4a..48d40a98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM geonode/geonode-base:latest-ubuntu-22.04 RUN rm -rf /usr/src/geonode RUN git clone https://github.com/GeoNode/geonode.git /usr/src/geonode -RUN cd /usr/src/geonode && git checkout 12124_assets && cd - +RUN cd /usr/src/geonode && git checkout 12124_assets_20240523 && cd - RUN mkdir -p /usr/src/importer RUN cd .. From c9c5251ed78b5ceec0ee4191a618f1a7158fa0e9 Mon Sep 17 00:00:00 2001 From: Mattia Date: Mon, 27 May 2024 14:59:20 +0200 Subject: [PATCH 10/13] Fix test coverage --- .env_test | 5 +++-- Dockerfile | 2 +- importer/handlers/README.md | 2 +- importer/handlers/common/raster.py | 8 ++++---- importer/handlers/common/vector.py | 9 +++++---- importer/tests/end2end/test_end2end.py | 3 ++- importer/tests/unit/test_task.py | 18 +++++++++++++++--- runtest.sh | 2 +- 8 files changed, 32 insertions(+), 17 deletions(-) diff --git a/.env_test b/.env_test index dcfa53ec..f751770a 100644 --- a/.env_test +++ b/.env_test @@ -178,8 +178,9 @@ DEBUG=False SECRET_KEY='myv-y4#7j-d*p-__@j#*3z@!y24fz8%^z2v6atuy4bo9vqr1_a' -STATIC_ROOT=/mnt/volumes/statics/static/ -MEDIA_ROOT=/mnt/volumes/statics/uploaded/ +STATIC_ROOT=/tmp/statics/static/ +MEDIA_ROOT=/tmp/statics/uploaded/ +ASSET_ROOT=/tmp/statics/assets/ GEOIP_PATH=/mnt/volumes/statics/geoip.db CACHE_BUSTING_STATIC_ENABLED=False diff --git a/Dockerfile b/Dockerfile index 48d40a98..083ebab5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM geonode/geonode-base:latest-ubuntu-22.04 RUN rm -rf /usr/src/geonode RUN git clone https://github.com/GeoNode/geonode.git /usr/src/geonode -RUN cd /usr/src/geonode && git checkout 12124_assets_20240523 && cd - +RUN cd /usr/src/geonode && git fetch --all && git checkout 12124_assets_20240523 && cd - RUN mkdir -p /usr/src/importer RUN cd .. diff --git a/importer/handlers/README.md b/importer/handlers/README.md index 8916e951..37d982a7 100644 --- a/importer/handlers/README.md +++ b/importer/handlers/README.md @@ -158,7 +158,7 @@ class BaseVectorFileHandler(BaseHandler): return def overwrite_geonode_resource( - self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None ): """ Base function to override the resource into geonode. Each handler can specify diff --git a/importer/handlers/common/raster.py b/importer/handlers/common/raster.py index 419c86d6..2990c4c5 100644 --- a/importer/handlers/common/raster.py +++ b/importer/handlers/common/raster.py @@ -369,7 +369,7 @@ def overwrite_geonode_resource( alternate: str, execution_id: str, resource_type: Dataset = Dataset, - files=None, + asset=None, ): dataset = resource_type.objects.filter(alternate__icontains=alternate) @@ -397,7 +397,7 @@ def overwrite_geonode_resource( f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created" ) return self.create_geonode_resource( - layer_name, alternate, execution_id, resource_type, files + layer_name, alternate, execution_id, resource_type, asset ) elif not dataset.exists() and not _overwrite: logger.warning( @@ -479,9 +479,9 @@ def copy_geonode_resource( layer_name=data_to_update.get("title"), alternate=new_alternate, execution_id=str(_exec.exec_id), - files=kwargs.get("kwargs", {}) + asset=kwargs.get("kwargs", {}) .get("new_file_location", {}) - .get("files", []), + .get("asset", []), ) resource.refresh_from_db() return resource diff --git a/importer/handlers/common/vector.py b/importer/handlers/common/vector.py index d311b07c..e53a9610 100644 --- a/importer/handlers/common/vector.py +++ b/importer/handlers/common/vector.py @@ -624,7 +624,7 @@ def overwrite_geonode_resource( alternate: str, execution_id: str, resource_type: Dataset = Dataset, - files=None, + asset=None, ): dataset = resource_type.objects.filter(alternate__icontains=alternate) @@ -637,7 +637,7 @@ def overwrite_geonode_resource( dataset = dataset.first() dataset = resource_manager.update( - dataset.uuid, instance=dataset, files=files + dataset.uuid, instance=dataset, files=asset.location ) self.handle_xml_file(dataset, _exec) @@ -653,7 +653,7 @@ def overwrite_geonode_resource( f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created" ) return self.create_geonode_resource( - layer_name, alternate, execution_id, resource_type, files + layer_name, alternate, execution_id, resource_type, asset ) elif not dataset.exists() and not _overwrite: logger.warning( @@ -731,11 +731,12 @@ def copy_geonode_resource( new_alternate: str, **kwargs, ): + new_resource = self.create_geonode_resource( layer_name=data_to_update.get("title"), alternate=new_alternate, execution_id=str(_exec.exec_id), - files=[], + asset=get_default_asset(resource), ) copy_assets_and_links(resource, target=new_resource) new_resource.refresh_from_db() diff --git a/importer/tests/end2end/test_end2end.py b/importer/tests/end2end/test_end2end.py index a042e270..f9ba8759 100644 --- a/importer/tests/end2end/test_end2end.py +++ b/importer/tests/end2end/test_end2end.py @@ -190,6 +190,7 @@ def test_import_geopackage_with_no_crs_table(self): @mock.patch( "importer.handlers.common.vector.BaseVectorFileHandler._select_valid_layers" ) + @override_settings(MEDIA_ROOT="/tmp/", ASSET_ROOT="/tmp/") def test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are_invalid( self, _select_valid_layers ): @@ -206,7 +207,7 @@ def test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are self.client.force_login(self.admin) response = self.client.post(self.url, data=payload) - self.assertEqual(500, response.status_code) + self.assertEqual(400, response.status_code) self.assertIn( "No valid layers found", diff --git a/importer/tests/unit/test_task.py b/importer/tests/unit/test_task.py index 9b49b483..2bacc12c 100644 --- a/importer/tests/unit/test_task.py +++ b/importer/tests/unit/test_task.py @@ -24,6 +24,7 @@ from geonode.resource.enumerator import ExecutionRequestAction from geonode.base.models import ResourceBase from geonode.base.populate_test_data import create_single_dataset +from geonode.assets.handlers import asset_handler_registry from dynamic_models.models import ModelSchema, FieldSchema from dynamic_models.exceptions import DynamicModelError, InvalidFieldNameError from importer.models import ResourceHandlerInfo @@ -40,7 +41,19 @@ class TestCeleryTasks(ImporterBaseTestSupport): def setUp(self): self.user = get_user_model().objects.first() + self.existing_file = f"{project_dir}/tests/fixture/valid.gpkg" + self.asset_handler = asset_handler_registry.get_default_handler() + + self.asset = self.asset_handler.create( + title="Original", + owner=self.user, + description=None, + type="importer.handlers.gpkg.handler.GPKGFileHandler", + files=[self.existing_file], + clone_files=False, + ) + self.exec_id = orchestrator.create_execution_request( user=get_user_model().objects.get(username=self.user), func_name="dummy_func", @@ -50,6 +63,8 @@ def setUp(self): "files": {"base_file": self.existing_file}, # "overwrite_existing_layer": True, "store_spatial_files": True, + "asset_id": self.asset.id, + "asset_module_path": f"{self.asset.__module__}.{self.asset.__class__.__name__}", }, ) @@ -506,9 +521,6 @@ def test_import_metadata_should_work_as_expected(self): layer.refresh_from_db() self.assertEqual(layer.title, "test_dataset") - # verify that the original has been deleted - self.assertFalse(os.path.exists(xml_in_tmp)) - class TestDynamicModelSchema(TransactionImporterBaseTestSupport): databases = ("default", "datastore") diff --git a/runtest.sh b/runtest.sh index 0150efab..8bbfbf35 100755 --- a/runtest.sh +++ b/runtest.sh @@ -2,4 +2,4 @@ set -a . ./.env_test set +a -coverage run --append --source='.' /usr/src/geonode/manage.py test importer -v2 --noinput +coverage run --append --source='.' /usr/src/geonode/manage.py test importer.tests.end2end.test_end2end.ImporterNoCRSImportTest.test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are_invalid -v2 --noinput From b2de28a9fc51f8db419752d3ed1637f36d6f517c Mon Sep 17 00:00:00 2001 From: Mattia Date: Mon, 27 May 2024 14:59:39 +0200 Subject: [PATCH 11/13] Fix test coverage --- runtest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtest.sh b/runtest.sh index 8bbfbf35..0150efab 100755 --- a/runtest.sh +++ b/runtest.sh @@ -2,4 +2,4 @@ set -a . ./.env_test set +a -coverage run --append --source='.' /usr/src/geonode/manage.py test importer.tests.end2end.test_end2end.ImporterNoCRSImportTest.test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are_invalid -v2 --noinput +coverage run --append --source='.' /usr/src/geonode/manage.py test importer -v2 --noinput From 647241bc7a3c41fb841416b8658095a011fcacc8 Mon Sep 17 00:00:00 2001 From: Mattia Date: Mon, 27 May 2024 15:07:29 +0200 Subject: [PATCH 12/13] Fix test coverage --- importer/tests/end2end/test_end2end.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/importer/tests/end2end/test_end2end.py b/importer/tests/end2end/test_end2end.py index f9ba8759..dd8de7ab 100644 --- a/importer/tests/end2end/test_end2end.py +++ b/importer/tests/end2end/test_end2end.py @@ -207,7 +207,7 @@ def test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are self.client.force_login(self.admin) response = self.client.post(self.url, data=payload) - self.assertEqual(400, response.status_code) + self.assertEqual(500, response.status_code) self.assertIn( "No valid layers found", From 6840ec770d1278e83d3f14789245029cf436481a Mon Sep 17 00:00:00 2001 From: Mattia Date: Fri, 7 Jun 2024 12:21:37 +0200 Subject: [PATCH 13/13] fix api --- importer/api/views.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/importer/api/views.py b/importer/api/views.py index d3f34968..6c13157b 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -174,10 +174,13 @@ def create(self, request, *args, **kwargs): except Exception as e: # in case of any exception, is better to delete the # cloned files to keep the storage under control - if storage_manager is not None: - storage_manager.delete_retrieved_paths(force=True) if asset: - asset.delete() + try: + asset.delete() + except Exception as _exc: + logger.warning(_exc) + elif storage_manager is not None: + storage_manager.delete_retrieved_paths(force=True) if execution_id: orchestrator.set_as_failed(execution_id=str(execution_id), reason=e) logger.exception(e)