From f488e172bf8f5d5bd8cf4629867a1c7a419ab797 Mon Sep 17 00:00:00 2001 From: mattiagiupponi Date: Wed, 19 May 2021 16:59:18 +0200 Subject: [PATCH] [Fixes #174] First test of importlayers refacoting --- .../management/commands/importlayers.py | 343 ++++++++---------- requirements.txt | 2 +- 2 files changed, 151 insertions(+), 194 deletions(-) diff --git a/geonode/layers/management/commands/importlayers.py b/geonode/layers/management/commands/importlayers.py index c4bbdf2c609..874eae8ddc1 100644 --- a/geonode/layers/management/commands/importlayers.py +++ b/geonode/layers/management/commands/importlayers.py @@ -18,13 +18,140 @@ # ######################################################################### +from io import BufferedReader +import os +from re import X from django.utils import timezone from django.core.management.base import BaseCommand +import requests +from requests.auth import HTTPBasicAuth from geonode.layers.utils import upload from geonode.people.utils import get_valid_user import traceback import datetime +from io import BufferedReader, IOBase +import os +import time +import requests +from requests.models import HTTPBasicAuth +import argparse + +parser=argparse.ArgumentParser() + +class GeoNodeUploader(): + def __init__( + self, + host: str, + folder_path: str, + username: str, + password: str, + call_delay: int = 10, + **kwargs, + ): + super().__init__(**kwargs) + self.host = host + self.folder_path = folder_path + self.username = username + self.password = password + self.call_delay = call_delay + + def execute(self): + + for file in os.listdir(self.folder_path): + if not os.path.exists(f"{self.folder_path}/{file}"): + print(f"The selected file path does not exist: {file}") + continue + + _file = f"{self.folder_path}/{file}" + spatial_files = ("dbf_file", "shx_file", "prj_file") + + base, ext = os.path.splitext(_file) + params = { + # make public since wms client doesn't do authentication + "permissions": '{ "users": {"AnonymousUser": ["view_resourcebase"]} , "groups":{}}', # to be decided + "time": "false", + "layer_title": file, + "time": "false", + "charset": "UTF-8", + } + + if ext.lower() == ".shp": + for spatial_file in spatial_files: + ext, _ = spatial_file.split("_") + file_path = f"{base}.{ext}" + # sometimes a shapefile is missing an extra file, + # allow for that + if os.path.exists(file_path): + params[spatial_file] = open(file_path, "rb") + elif ext.lower() == ".tif": + file_path = base + ext + params["tif_file"] = open(file_path, "rb") + else: + continue + print(f"Starting upload for file: {self.folder_path}/{file}") + + print(f"Generating params dict: {params}") + + files = {} + + print("Opening client session") + + client = requests.session() + + print("Opening Files") + with open(_file, "rb") as base_file: + params["base_file"] = base_file + for name, value in params.items(): + if isinstance(value, BufferedReader): + files[name] = (os.path.basename(value.name), value) + params[name] = os.path.basename(value.name) + + print( + f"Sending PUT request to geonode: {self.host}/api/v2/uploads/upload/" + ) + + headers = {"Authorization": "Bearer g2ywFNf0pnBP4K0dS80DopNEMylG2N"} + + response = client.put( + f"{self.host}/api/v2/uploads/upload/", + headers=headers, + data=params, + files=files, + ) + + print(f"Geonode response with status code {response.status_code}") + + print("Closing spatial files") + + if isinstance(params.get("tif_file"), IOBase): + params["tif_file"].close() + + print("Getting import_id") + import_id = int(response.json()["redirect_to"].split("?id=")[1].split("&")[0]) + print(f"ImportID found with ID: {import_id}") + + print(f"Getting upload_list") + upload_response = client.get(f"{self.host}/api/v2/uploads/") + + print(f"Extraction of upload_id") + + upload_id = self._get_upload_id(upload_response, import_id) + + print(f"UploadID found {upload_id}") + + print(f"Calling upload detail page") + client.get(f"{self.host}/api/v2/uploads/{upload_id}") + + print(f"Calling final upload page") + client.get(f"{self.host}/upload/final?id={import_id}") + + print(f"Layer added in GeoNode") + @staticmethod + def _get_upload_id(upload_response, import_id): + for item in upload_response.json()["uploads"]: + if item.get("import_id", None) == import_id: + return item.get("id", None) class Command(BaseCommand): help = ("Brings a data file or a directory full of data files into a" @@ -35,211 +162,41 @@ def add_arguments(self, parser): # Positional arguments parser.add_argument('path', nargs='*', help='path [path...]') - # Named (optional) arguments parser.add_argument( - '-u', - '--user', - dest="user", - default=None, - help="Name of the user account which should own the imported layers") - - parser.add_argument( - '-i', - '--ignore-errors', - action='store_true', - dest='ignore_errors', - default=False, - help='Stop after any errors are encountered.') - - parser.add_argument( - '-o', - '--overwrite', - dest='overwrite', - default=False, - action="store_true", - help="Overwrite existing layers if discovered (defaults False)") - - parser.add_argument( - '-k', - '--keywords', - dest='keywords', - default="", - help=("The default keywords, separated by comma, for the imported" - " layer(s). Will be the same for all imported layers" - " if multiple imports are done in one command")) - - parser.add_argument( - '-l', - '--license', - dest='license', - default=None, - help=("The license for the imported layer(s). Will be the same for" - " all imported layers if multiple imports are done" - " in one command")) - - parser.add_argument( - '-c', - '--category', - dest='category', - default=None, - help=("The category for the imported layer(s). Will be the same" - " for all imported layers if multiple imports are done" - " in one command")) - - parser.add_argument( - '-r', - '--regions', - dest='regions', - default="", - help=("The default regions, separated by comma, for the imported" - " layer(s). Will be the same for all imported layers if" - " multiple imports are done in one command")) + '-hh', + '--host', + dest='host', + help="Geonode host url") parser.add_argument( - '-n', - '--name', - dest='layername', - default=None, - help="The name for the imported layer(s). Can not be used with multiple imports") - - parser.add_argument( - '-t', - '--title', - dest='title', - default=None, - help=("The title for the imported layer(s). Will be the same for" - " all imported layers if multiple imports are done" - " in one command")) - - parser.add_argument( - '-a', - '--abstract', - dest='abstract', - default=None, - help=("The abstract for the imported layer(s). Will be the same for" - "all imported layers if multiple imports are done" - "in one command")) - - parser.add_argument( - '-d', - '--date', - dest='date', - default=None, - help=('The date and time for the imported layer(s). Will be the ' - 'same for all imported layers if multiple imports are done ' - 'in one command. Use quotes to specify both the date and ' - 'time in the format \'YYYY-MM-DD HH:MM:SS\'.')) + '-u', + '--username', + dest='username', + help="Geonode username") parser.add_argument( '-p', - '--private', - dest='private', - default=False, - action="store_true", - help="Make layer viewable only to owner") - - parser.add_argument( - '-m', - '--metadata_uploaded_preserve', - dest='metadata_uploaded_preserve', - default=False, - action="store_true", - help="Force metadata XML to be preserved") - - parser.add_argument( - '-C', - '--charset', - dest='charset', - default='UTF-8', - help=("Specify the charset of the data")) + '--password', + dest='password', + help="Geonode password") def handle(self, *args, **options): - verbosity = int(options.get('verbosity')) - # ignore_errors = options.get('ignore_errors') - username = options.get('user') - user = get_valid_user(username) - overwrite = options.get('overwrite') - name = options.get('layername', None) - title = options.get('title', None) - abstract = options.get('abstract', None) - date = options.get('date', None) - license = options.get('license', None) - category = options.get('category', None) - private = options.get('private', False) - metadata_uploaded_preserve = options.get('metadata_uploaded_preserve', - False) - charset = options.get('charset', 'UTF-8') - - if verbosity > 0: - console = self.stdout - else: - console = None - - skip = not overwrite - - keywords = options.get('keywords').split(',') - if len(keywords) == 1 and keywords[0] == '': - keywords = [] - else: - keywords = [k.strip() for k in keywords] - regions = options.get('regions').split(',') - if len(regions) == 1 and regions[0] == '': - regions = [] - else: - regions = [r.strip() for r in regions] + host = options.get('host') or "http://localhost:8000" + username = options.get('username') or 'admin' + password = options.get('password') or "admin" + start = datetime.datetime.now(timezone.get_current_timezone()) - output = [] - - for path in options['path']: - out = upload( - path, - user=user, - overwrite=overwrite, - skip=skip, - name=name, - title=title, - abstract=abstract, - date=date, - keywords=keywords, - verbosity=verbosity, - console=console, - license=license, - category=category, - regions=regions, - private=private, - metadata_uploaded_preserve=metadata_uploaded_preserve, - charset=charset) - - output.extend(out) - - updated = [dict_['file'] - for dict_ in output if dict_['status'] == 'updated'] - created = [dict_['file'] - for dict_ in output if dict_['status'] == 'created'] - skipped = [dict_['file'] - for dict_ in output if dict_['status'] == 'skipped'] - failed = [dict_['file'] - for dict_ in output if dict_['status'] == 'failed'] + + GeoNodeUploader( + host=host, + username=username, + password=password, + folder_path=options['path'][0] + ).execute() finish = datetime.datetime.now(timezone.get_current_timezone()) td = finish - start duration = td.microseconds / 1000000 + td.seconds + td.days * 24 * 3600 duration_rounded = round(duration, 2) - if verbosity > 1: - print("\nDetailed report of failures:") - for dict_ in output: - if dict_['status'] == 'failed': - print("\n\n", dict_['file'], "\n================") - traceback.print_exception(dict_['exception_type'], - dict_['error'], - dict_['traceback']) - - if verbosity > 0: - print(f"\n\nFinished processing {len(output)} layers in {duration_rounded} seconds.\n") - print(f"{len(created)} Created layers") - print(f"{len(updated)} Updated layers") - print(f"{len(skipped)} Skipped layers") - print(f"{len(failed)} Failed layers") - if len(output) > 0: - print(f"{(duration * 1.0 / len(output))} seconds per layer") + print(f"{(duration * 1.0 / len(os.listdir(options['path'])))} seconds per layer") diff --git a/requirements.txt b/requirements.txt index 108a9f55265..7a4f0880bfa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -107,7 +107,7 @@ django-storages==1.11.1 dropbox==11.9.0 google-cloud-storage==1.38.0 google-cloud-core==1.6.0 -boto3==1.17.70 +boto3==1.17.74 # Django Caches python-memcached<=1.59