diff --git a/Dockerfile b/Dockerfile index fabb7a98..7305bdb5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM geonode/geonode-base:latest-ubuntu-22.04 - +RUN rm -rf /usr/src/geonode RUN git clone https://github.com/GeoNode/geonode.git /usr/src/geonode RUN mkdir -p /usr/src/importer diff --git a/importer/api/serializer.py b/importer/api/serializer.py index 74874ed7..16ae3d6b 100644 --- a/importer/api/serializer.py +++ b/importer/api/serializer.py @@ -15,7 +15,7 @@ class Meta: "store_spatial_files", "overwrite_existing_layer", "skip_existing_layers", - "source" + "source", ) base_file = serializers.FileField() @@ -24,4 +24,4 @@ class Meta: store_spatial_files = serializers.BooleanField(required=False, default=True) overwrite_existing_layer = serializers.BooleanField(required=False, default=False) skip_existing_layers = serializers.BooleanField(required=False, default=False) - source = serializers.CharField(required=False, default='upload') + source = serializers.CharField(required=False, default="upload") diff --git a/importer/api/views.py b/importer/api/views.py index 5ee72919..0d71b5eb 100644 --- a/importer/api/views.py +++ b/importer/api/views.py @@ -148,7 +148,7 @@ def create(self, request, *args, **kwargs): legacy_upload_name=_file.name, action=action, name=_file.name, - source=extracted_params.get('source'), + source=extracted_params.get("source"), ) sig = import_orchestrator.s( diff --git a/importer/handlers/common/metadata.py b/importer/handlers/common/metadata.py index 0067d8b0..5b303577 100644 --- a/importer/handlers/common/metadata.py +++ b/importer/handlers/common/metadata.py @@ -56,7 +56,7 @@ def extract_params_from_data(_data, action=None): @staticmethod def perform_last_step(execution_id): _exec = orchestrator.get_execution_object(execution_id) - + _exec.output_params.update( **{ "detail_url": [ @@ -81,7 +81,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs): original_handler = orchestrator.load_handler( dataset.resourcehandlerinfo_set.first().handler_module_path )() - + ResourceHandlerInfo.objects.create( handler_module_path=dataset.resourcehandlerinfo_set.first().handler_module_path, resource=dataset, diff --git a/importer/handlers/common/tests_vector.py b/importer/handlers/common/tests_vector.py index bdfa13d7..a33c5b9b 100644 --- a/importer/handlers/common/tests_vector.py +++ b/importer/handlers/common/tests_vector.py @@ -7,6 +7,7 @@ from importer.handlers.common.vector import BaseVectorFileHandler, import_with_ogr2ogr from django.contrib.auth import get_user_model from importer import project_dir +from importer.handlers.gpkg.handler import GPKGFileHandler from importer.orchestrator import orchestrator from geonode.base.populate_test_data import create_single_dataset from geonode.resource.models import ExecutionRequest @@ -23,6 +24,7 @@ def setUpClass(cls): cls.handler = BaseVectorFileHandler() cls.valid_gpkg = f"{project_dir}/tests/fixture/valid.gpkg" cls.invalid_gpkg = f"{project_dir}/tests/fixture/invalid.gpkg" + cls.no_crs_gpkg = f"{project_dir}/tests/fixture/noCrsTable.gpkg" cls.user, _ = get_user_model().objects.get_or_create(username="admin") cls.invalid_files = {"base_file": cls.invalid_gpkg} cls.valid_files = {"base_file": cls.valid_gpkg} @@ -156,9 +158,13 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv input_params={"files": self.valid_files, "skip_existing_layer": True}, ) - # start the resource import - self.handler.import_resource( - files=self.valid_files, execution_id=str(exec_id) + with self.assertRaises(Exception) as exception: + # start the resource import + self.handler.import_resource( + files=self.valid_files, execution_id=str(exec_id) + ) + self.assertIn( + "No valid layers found", exception.exception.args[0], 'No valid layers found.' ) celery_chord.assert_not_called() @@ -296,3 +302,20 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command_if_dum self.assertTrue("-f PGDump /vsistdout/" in _call_as_string) self.assertTrue("psql -d" in _call_as_string) self.assertFalse("-f PostgreSQL PG" in _call_as_string) + + def test_select_valid_layers(self): + """ + The function should return only the datasets with a geometry + The other one are discarded + """ + all_layers = GPKGFileHandler().get_ogr2ogr_driver().Open(self.no_crs_gpkg) + + with self.assertLogs(level="ERROR") as _log: + valid_layer = GPKGFileHandler()._select_valid_layers(all_layers) + + self.assertIn( + "The following layer layer_styles does not have a Coordinate Reference System (CRS) and will be skipped.", + [x.message for x in _log.records], + ) + self.assertEqual(1, len(valid_layer)) + self.assertEqual("mattia_test", valid_layer[0].GetName()) diff --git a/importer/handlers/common/vector.py b/importer/handlers/common/vector.py index fb198491..c6626596 100644 --- a/importer/handlers/common/vector.py +++ b/importer/handlers/common/vector.py @@ -305,7 +305,8 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: Internally will call the steps required to import the data inside the geonode_data database """ - layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + layers = self._select_valid_layers(all_layers) # for the moment we skip the dyanamic model creation layer_count = len(layers) logger.info(f"Total number of layers available: {layer_count}") @@ -317,7 +318,11 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: dynamic_model = None celery_group = None try: + if len(layers) == 0: + raise Exception("No valid layers found") + # start looping on the layers available + for index, layer in enumerate(layers, start=1): layer_name = self.fixup_name(layer.GetName()) @@ -397,6 +402,20 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: raise e return + def _select_valid_layers(self, all_layers): + layers = [] + for layer in all_layers: + try: + self.identify_authority(layer) + layers.append(layer) + except Exception as e: + logger.error(e) + logger.error( + f"The following layer {layer.GetName()} does not have a Coordinate Reference System (CRS) and will be skipped." + ) + pass + return layers + def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten): workspace = DataPublisher(None).workspace dataset_available = Dataset.objects.filter( diff --git a/importer/handlers/shapefile/serializer.py b/importer/handlers/shapefile/serializer.py index bdda714a..61c2d593 100644 --- a/importer/handlers/shapefile/serializer.py +++ b/importer/handlers/shapefile/serializer.py @@ -18,7 +18,7 @@ class Meta: "store_spatial_files", "overwrite_existing_layer", "skip_existing_layers", - "source" + "source", ) base_file = serializers.FileField() @@ -30,4 +30,4 @@ class Meta: store_spatial_files = serializers.BooleanField(required=False, default=True) overwrite_existing_layer = serializers.BooleanField(required=False, default=False) skip_existing_layers = serializers.BooleanField(required=False, default=False) - source = serializers.CharField(required=False, default='upload') + source = serializers.CharField(required=False, default="upload") diff --git a/importer/handlers/xml/serializer.py b/importer/handlers/xml/serializer.py index adb890de..6cc81c51 100644 --- a/importer/handlers/xml/serializer.py +++ b/importer/handlers/xml/serializer.py @@ -12,4 +12,4 @@ class Meta: base_file = serializers.FileField() dataset_title = serializers.CharField(required=True) - source = serializers.CharField(required=False, default='resource_file_upload') + source = serializers.CharField(required=False, default="resource_file_upload") diff --git a/importer/tests/end2end/test_end2end.py b/importer/tests/end2end/test_end2end.py index d9b03d2c..5c9043a6 100644 --- a/importer/tests/end2end/test_end2end.py +++ b/importer/tests/end2end/test_end2end.py @@ -26,6 +26,7 @@ def setUpClass(cls) -> None: super().setUpClass() cls.valid_gkpg = f"{project_dir}/tests/fixture/valid.gpkg" cls.valid_geojson = f"{project_dir}/tests/fixture/valid.geojson" + cls.no_crs_gpkg = f"{project_dir}/tests/fixture/noCrsTable.gpkg" file_path = gisdata.VECTOR_DATA filename = os.path.join(file_path, "san_andres_y_providencia_highway.shp") cls.valid_shp = { @@ -101,8 +102,8 @@ def _assertimport(self, payload, initial_name, overwrite=False, last_update=None ) self.assertTrue(dataset.exists()) - # check if the resource is in geoserver resources = self.cat.get_resources() + # check if the resource is in geoserver self.assertTrue(dataset.first().title in [y.name for y in resources]) if overwrite: self.assertTrue(dataset.first().last_updated > last_update) @@ -154,6 +155,66 @@ def test_import_gpkg_overwrite(self): self.cat.delete(layer) +class ImporterNoCRSImportTest(BaseImporterEndToEndTest): + @override_settings(ASYNC_SIGNALS=False) + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings( + GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data" + ) + def test_import_geopackage_with_no_crs_table(self): + layer = self.cat.get_layer("geonode:mattia_test") + if layer: + self.cat.delete(layer) + payload = { + "base_file": open(self.no_crs_gpkg, "rb"), + } + initial_name = "mattia_test" + with self.assertLogs(level="ERROR") as _log: + self._assertimport(payload, initial_name) + + self.assertIn( + "The following layer layer_styles does not have a Coordinate Reference System (CRS) and will be skipped.", + [x.message for x in _log.records], + ) + layer = self.cat.get_layer("geonode:mattia_test") + if layer: + self.cat.delete(layer) + + @override_settings(ASYNC_SIGNALS=False) + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings( + GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data" + ) + @mock.patch( + "importer.handlers.common.vector.BaseVectorFileHandler._select_valid_layers" + ) + def test_import_geopackage_with_no_crs_table_should_raise_error_if_all_layer_are_invalid( + self, _select_valid_layers + ): + _select_valid_layers.return_value = [] + layer = self.cat.get_layer("geonode:mattia_test") + if layer: + self.cat.delete(layer) + + payload = { + "base_file": open(self.no_crs_gpkg, "rb"), + } + + with self.assertLogs(level="ERROR") as _log: + self.client.force_login(self.admin) + + response = self.client.post(self.url, data=payload) + self.assertEqual(500, response.status_code) + + self.assertIn( + "No valid layers found", + [x.message for x in _log.records], + ) + layer = self.cat.get_layer("geonode:mattia_test") + if layer: + self.cat.delete(layer) + + class ImporterGeoJsonImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings( diff --git a/importer/tests/fixture/noCrsTable.gpkg b/importer/tests/fixture/noCrsTable.gpkg new file mode 100755 index 00000000..9cd27808 Binary files /dev/null and b/importer/tests/fixture/noCrsTable.gpkg differ diff --git a/runtest.sh b/runtest.sh index bb8b89d2..0150efab 100755 --- a/runtest.sh +++ b/runtest.sh @@ -2,4 +2,4 @@ set -a . ./.env_test set +a -coverage run --append --source='.' /usr/src/geonode/manage.py test importer -v2 --noinput \ No newline at end of file +coverage run --append --source='.' /usr/src/geonode/manage.py test importer -v2 --noinput