Skip to content

Commit

Permalink
[Fixes GeoNode#7945] Ingest harvested layer data to geonode
Browse files Browse the repository at this point in the history
  • Loading branch information
meomancer committed Aug 13, 2021
1 parent cecc88c commit d360ab9
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
32 changes: 32 additions & 0 deletions geonode/harvesting/harvesters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@
import logging
import typing
import urllib.parse
import os
import zipfile

import requests
from django.core.files import uploadedfile
from django.db.models import F
from django.utils import timezone
from geonode.base.models import ResourceBase
from geonode.layers.models import Dataset
from geonode.resource.manager import resource_manager
from geonode.storage.manager import storage_manager

Expand Down Expand Up @@ -181,6 +184,18 @@ def update_geonode_resource(
defaults = self.get_geonode_resource_defaults(
harvested_info, harvestable_resource)
geonode_resource = harvestable_resource.geonode_resource
if geonode_resource is None:
geonode_resource_type = self.get_geonode_resource_type(
harvestable_resource.remote_resource_type)
if geonode_resource_type == Dataset:
if len(harvested_info.copied_resources) > 0:
geonode_resource = resource_manager.ingest(
harvested_info.copied_resources,
uuid=str(harvested_info.resource_descriptor.uuid),
resource_type=self.get_geonode_resource_type(
harvestable_resource.remote_resource_type),
defaults=defaults)

if geonode_resource is None:
geonode_resource = resource_manager.create(
str(harvested_info.resource_descriptor.uuid),
Expand Down Expand Up @@ -375,6 +390,23 @@ def download_resource_file(url: str, target_name: str) -> str:
return result


def unzip_file(file_name: str) -> list:
""" Unzip file and return the list path of extracted files
"""
filepath = storage_manager.path(file_name)
files = []
try:
with zipfile.ZipFile(filepath) as zipdata:
folder = filepath + '_folder'
folder_name = file_name + '_folder'
os.makedirs(folder)
zipdata.extractall(folder)
files = [folder_name + '/' + filename for filename in storage_manager.listdir(folder)[1]]
except zipfile.BadZipFile:
pass
return files


def _sanitize_file_name(file_name: str) -> typing.Optional[str]:
"""Inspired by django's `django.http.multipartparser.MultiPartParser.sanitize_file_name()` method."""

Expand Down
16 changes: 15 additions & 1 deletion geonode/harvesting/harvesters/geonode.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(
*args,
harvest_documents: typing.Optional[bool] = True,
harvest_datasets: typing.Optional[bool] = True,
copy_datasets: typing.Optional[bool] = False,
harvest_maps: typing.Optional[bool] = True,
copy_documents: typing.Optional[bool] = False,
resource_title_filter: typing.Optional[str] = None,
Expand All @@ -86,6 +87,7 @@ def __init__(
self.harvest_documents = (
harvest_documents if harvest_documents is not None else True)
self.harvest_datasets = harvest_datasets if harvest_datasets is not None else True
self.copy_datasets = copy_datasets
self.harvest_maps = harvest_maps if harvest_maps is not None else True
self.copy_documents = copy_documents
self.resource_title_filter = resource_title_filter
Expand All @@ -107,6 +109,8 @@ def from_django_record(cls, record: models.Harvester):
"harvest_documents", True),
harvest_datasets=record.harvester_type_specific_configuration.get(
"harvest_datasets", True),
copy_datasets=record.harvester_type_specific_configuration.get(
"copy_datasets", False),
harvest_maps=record.harvester_type_specific_configuration.get(
"harvest_maps", True),
copy_documents=record.harvester_type_specific_configuration.get(
Expand Down Expand Up @@ -140,6 +144,10 @@ def get_extra_config_schema(cls) -> typing.Dict:
"type": "boolean",
"default": True
},
"copy_datasets": {
"type": "boolean",
"default": False
},
"harvest_maps": {
"type": "boolean",
"default": True
Expand Down Expand Up @@ -263,7 +271,7 @@ def should_copy_resource(
) -> bool:
return {
GeoNodeResourceType.DOCUMENT.value: self.copy_documents,
GeoNodeResourceType.DATASET.value: False,
GeoNodeResourceType.DATASET.value: self.copy_datasets,
GeoNodeResourceType.MAP.value: False,
}[harvestable_resource.remote_resource_type]

Expand Down Expand Up @@ -571,6 +579,12 @@ def get_distribution_info(
"bbox": f"{min_x},{min_y},{max_x},{max_y}"
}
original = f"{wcs}?{urllib.parse.urlencode(query_params)}"
else:
try:
original = [record_link.get('url') for record_link in api_record.get("links", []) if record_link.get('name') == 'Zipped Shapefile'][0]
except IndexError:
pass

return resourcedescriptor.RecordDistribution(
link_url=link,
wms_url=wms,
Expand Down
4 changes: 4 additions & 0 deletions geonode/harvesting/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def _harvest_resource(
copied_name = worker.copy_resource(harvestable_resource, harvested_resource_info)
if copied_name is not None:
harvested_resource_info.copied_resources.append(copied_name)
files = base.unzip_file(copied_name)
for filename in files:
harvested_resource_info.copied_resources.append(filename)

now_ = now()
if harvested_resource_info is not None:
worker.update_geonode_resource(
Expand Down

0 comments on commit d360ab9

Please sign in to comment.