Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assets data retriever #244

Merged
merged 16 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .env_test
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@ DEBUG=False

SECRET_KEY='myv-y4#7j-d*p-__@j#*3z@!y24fz8%^z2v6atuy4bo9vqr1_a'

STATIC_ROOT=/mnt/volumes/statics/static/
MEDIA_ROOT=/mnt/volumes/statics/uploaded/
STATIC_ROOT=/tmp/statics/static/
MEDIA_ROOT=/tmp/statics/uploaded/
ASSET_ROOT=/tmp/statics/assets/
GEOIP_PATH=/mnt/volumes/statics/geoip.db

CACHE_BUSTING_STATIC_ENABLED=False
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM geonode/geonode-base:latest-ubuntu-22.04
RUN rm -rf /usr/src/geonode
RUN git clone https://github.com/GeoNode/geonode.git /usr/src/geonode
RUN cd /usr/src/geonode && git checkout 12124_assets && cd -
RUN cd /usr/src/geonode && git fetch --all && git checkout 12124_assets_20240523 && cd -
RUN mkdir -p /usr/src/importer

RUN cd ..
Expand Down
2 changes: 1 addition & 1 deletion importer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

project_dir = os.path.dirname(os.path.abspath(__file__))

VERSION = (1, 0, 10)
VERSION = (1, 1, 0)
__version__ = ".".join([str(i) for i in VERSION])
__author__ = "geosolutions-it"
__email__ = "[email protected]"
Expand Down
51 changes: 51 additions & 0 deletions importer/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

from importer.models import ResourceHandlerInfo
from importer.tests.utils import ImporterBaseTestSupport
from importer.orchestrator import orchestrator
from django.utils.module_loading import import_string
from geonode.assets.models import LocalAsset


class TestImporterViewSet(ImporterBaseTestSupport):
Expand Down Expand Up @@ -153,3 +156,51 @@ def test_copy_ther_resource_if_file_handler_is_set(self, _orc):

self.assertEqual(200, response.status_code)
_orc.s.assert_called_once()

@patch("importer.api.views.import_orchestrator")
def test_asset_is_created_before_the_import_start(self, patch_upload):
patch_upload.apply_async.side_effect = MagicMock()

self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(
name="test.geojson", content=b"some-content"
),
"store_spatial_files": True,
}

response = self.client.post(self.url, data=payload)

self.assertEqual(201, response.status_code)

self.assertTrue(201, response.status_code)

_exec = orchestrator.get_execution_object(response.json()["execution_id"])

asset_handler = import_string(_exec.input_params["asset_module_path"])
self.assertTrue(asset_handler.objects.filter(id=_exec.input_params["asset_id"]))

asset_handler.objects.filter(id=_exec.input_params["asset_id"]).delete()

@patch("importer.api.views.import_orchestrator")
@patch(
"importer.api.views.UploadLimitValidator.validate_parallelism_limit_per_user"
)
def test_asset_should_be_deleted_if_created_during_with_exception(
self, validate_parallelism_limit_per_user, patch_upload
):
patch_upload.apply_async.s.side_effect = MagicMock()
validate_parallelism_limit_per_user.side_effect = Exception("random exception")

self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(
name="test.geojson", content=b"some-content"
),
"store_spatial_files": True,
}

response = self.client.post(self.url, data=payload)

self.assertEqual(500, response.status_code)
self.assertFalse(LocalAsset.objects.exists())
42 changes: 38 additions & 4 deletions importer/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
from rest_framework.parsers import FileUploadParser, MultiPartParser
from rest_framework.permissions import IsAuthenticatedOrReadOnly
from rest_framework.response import Response
from geonode.assets.handlers import asset_handler_registry
from geonode.assets.local import LocalAssetHandler

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,6 +93,8 @@ def create(self, request, *args, **kwargs):
"""
_file = request.FILES.get("base_file") or request.data.get("base_file")
execution_id = None
asset_handler = LocalAssetHandler()
asset_dir = asset_handler._create_asset_dir()

serializer = self.get_serializer_class()
data = serializer(data=request.data)
Expand All @@ -111,23 +115,30 @@ def create(self, request, *args, **kwargs):
remote_files={"base_file": _data.get("zip_file", _data.get("kmz_file"))}
)
# cloning and unzip the base_file
storage_manager.clone_remote_files()
storage_manager.clone_remote_files(
cloning_directory=asset_dir, create_tempdir=False
)
# update the payload with the unziped paths
_data.update(storage_manager.get_retrieved_paths())

handler = orchestrator.get_handler(_data)

if _file and handler:
asset = None
try:
# cloning data into a local folder
extracted_params, _data = handler.extract_params_from_data(_data)
if storage_manager is None:
# means that the storage manager is not initialized yet, so
# the file is not a zip
storage_manager = StorageManager(remote_files=_data)
storage_manager.clone_remote_files()
storage_manager.clone_remote_files(
cloning_directory=asset_dir, create_tempdir=False
)
# get filepath
files = storage_manager.get_retrieved_paths()
asset, files = self.generate_asset_and_retrieve_paths(
request, storage_manager, handler
)

upload_validator = UploadLimitValidator(request.user)
upload_validator.validate_parallelism_limit_per_user()
Expand All @@ -144,6 +155,10 @@ def create(self, request, *args, **kwargs):
input_params={
**{"files": files, "handler_module_path": str(handler)},
**extracted_params,
**{
"asset_id": asset.id,
"asset_module_path": f"{asset.__module__}.{asset.__class__.__name__}",
},
},
legacy_upload_name=_file.name,
action=action,
Expand All @@ -159,7 +174,12 @@ def create(self, request, *args, **kwargs):
except Exception as e:
# in case of any exception, is better to delete the
# cloned files to keep the storage under control
if storage_manager is not None:
if asset:
try:
asset.delete()
except Exception as _exc:
logger.warning(_exc)
elif storage_manager is not None:
storage_manager.delete_retrieved_paths(force=True)
if execution_id:
orchestrator.set_as_failed(execution_id=str(execution_id), reason=e)
Expand All @@ -168,6 +188,20 @@ def create(self, request, *args, **kwargs):

raise ImportException(detail="No handlers found for this dataset type")

def generate_asset_and_retrieve_paths(self, request, storage_manager, handler):
asset_handler = asset_handler_registry.get_default_handler()
_files = storage_manager.get_retrieved_paths()
asset = asset_handler.create(
title="Original",
owner=request.user,
description=None,
type=str(handler),
files=list(set(_files.values())),
clone_files=False,
)

return asset, _files


class ResourceImporter(DynamicModelViewSet):
authentication_classes = [
Expand Down
10 changes: 8 additions & 2 deletions importer/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ def create_geonode_resource(

_files = _exec.input_params.get("files")

_asset = (
import_string(_exec.input_params.get("asset_module_path"))
.objects.filter(id=_exec.input_params.get("asset_id"))
.first()
)

handler = import_string(handler_module_path)()
_overwrite = _exec.input_params.get("overwrite_existing_layer")

Expand All @@ -337,14 +343,14 @@ def create_geonode_resource(
layer_name=layer_name,
alternate=alternate,
execution_id=execution_id,
files=_files,
asset=_asset,
)
else:
resource = handler.create_geonode_resource(
layer_name=layer_name,
alternate=alternate,
execution_id=execution_id,
files=_files,
asset=_asset,
)

if _overwrite:
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class BaseVectorFileHandler(BaseHandler):
return

def overwrite_geonode_resource(
self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None
self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None
):
"""
Base function to override the resource into geonode. Each handler can specify
Expand Down
7 changes: 0 additions & 7 deletions importer/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from importer.utils import ImporterRequestAction as ira
from django_celery_results.models import TaskResult
from django.db.models import Q
from geonode.storage.manager import storage_manager

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -150,12 +149,6 @@ def perform_last_step(execution_id):
]
_exec.output_params.update({"resources": resource_output_params})
_exec.save()

# since the original file is now available as asset, we can delete the input files
# TODO must be improved. The asset should be created in the beginning
for _file in _exec.input_params.get("files", {}).values():
if storage_manager.exists(_file):
storage_manager.delete(_file)

return _exec

Expand Down
1 change: 0 additions & 1 deletion importer/handlers/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from importer.orchestrator import orchestrator
from django.shortcuts import get_object_or_404
from geonode.layers.models import Dataset
from geonode.storage.manager import storage_manager

logger = logging.getLogger(__name__)

Expand Down
22 changes: 7 additions & 15 deletions importer/handlers/common/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def create_geonode_resource(
alternate: str,
execution_id: str,
resource_type: Dataset = Dataset,
files=None,
asset=None,
):
"""
Base function to create the resource into geonode. Each handler can specify
Expand All @@ -335,6 +335,7 @@ def create_geonode_resource(
logger.warning(
f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created"
)

saved_dataset = resource_manager.create(
None,
resource_type=resource_type,
Expand All @@ -346,16 +347,7 @@ def create_geonode_resource(
dirty_state=True,
title=layer_name,
owner=_exec.user,
extension=self.supported_file_extension_config["id"],
data_title="Original",
data_type=self.supported_file_extension_config["label"],
link_type="uploaded", # should be in geonode.base.enumerations.LINK_TYPES
files=list(
set(
list(_exec.input_params.get("files", {}).values())
or list(files)
)
),
asset=asset,
),
)

Expand All @@ -377,7 +369,7 @@ def overwrite_geonode_resource(
alternate: str,
execution_id: str,
resource_type: Dataset = Dataset,
files=None,
asset=None,
):
dataset = resource_type.objects.filter(alternate__icontains=alternate)

Expand Down Expand Up @@ -405,7 +397,7 @@ def overwrite_geonode_resource(
f"The dataset required {alternate} does not exists, but an overwrite is required, the resource will be created"
)
return self.create_geonode_resource(
layer_name, alternate, execution_id, resource_type, files
layer_name, alternate, execution_id, resource_type, asset
)
elif not dataset.exists() and not _overwrite:
logger.warning(
Expand Down Expand Up @@ -487,9 +479,9 @@ def copy_geonode_resource(
layer_name=data_to_update.get("title"),
alternate=new_alternate,
execution_id=str(_exec.exec_id),
files=kwargs.get("kwargs", {})
asset=kwargs.get("kwargs", {})
.get("new_file_location", {})
.get("files", []),
.get("asset", []),
)
resource.refresh_from_db()
return resource
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/common/tests_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_select_valid_layers(self):
self.assertEqual(1, len(valid_layer))
self.assertEqual("mattia_test", valid_layer[0].GetName())

@override_settings(MEDIA_ROOT='/tmp')
@override_settings(MEDIA_ROOT="/tmp")
def test_perform_last_step(self):
"""
Output params in perform_last_step should return the detail_url and the ID
Expand Down
Loading