added poetry

eo2cube · Dec 14, 2020 · 30e5efd · 30e5efd
1 parent f3c2d2b
commit 30e5efd
Show file tree

Hide file tree

Showing 589 changed files with 244,741 additions and 188 deletions.
diff --git a/build/lib/phenocube/__init__.py b/build/lib/phenocube/__init__.py
@@ -0,0 +1,5 @@
+"""phenocube - phenocube is an python package providing a wide range of tools for working with the phenocube environment"""
+
+__version__ = '0.1.0'
+__author__ = 'Steven Hill <[email protected]>'
+__all__ = []
diff --git a/build/lib/phenocube/clip_xr2gpd.py b/build/lib/phenocube/clip_xr2gpd.py
@@ -0,0 +1,38 @@
+import rasterio.features
+import xarray as xr
+
+def clip_xr2gpd(dataset, gpd):
+    """
+    Clips a xArray Dataset to a GeoPandasDataframe. 
+    
+    Description
+    ----------
+    Takes an xArray dataset and a Geodataframe and clips the xArray dataset to the shape of the Geodataframe.
+    
+    Parameters
+    ----------
+    dataset: xr.Dataset
+         A multi-dimensional array with x,y and time dimensions and one or more data variables.
+         
+    gpd: geopandas.Geodataframe
+        A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe
+        can also be used as input.
+    
+    Returns
+    -------
+    masked_dataset: xr.Dataset
+        A xr.Dataset like the input dataset with only pixels which are within the polygons of the geopandas.Geodataframe.
+        Every other pixel is given the value NaN.
+    """
+
+    # selects geometry of the desired gpd and formsa boolean mask from it
+    ShapeMask = rasterio.features.geometry_mask(gpd.loc[:, "geom"],
+                                                out_shape=(len(dataset.latitude), len(dataset.longitude)),
+                                                transform=dataset.geobox.transform,
+                                                invert=True)
+    ShapeMask = xr.DataArray(ShapeMask, dims=("latitude", "longitude")) # converts boolean mask into an xArray format
+    masked_dataset = dataset.where(ShapeMask == True) # combines mask and dataset so only the pixels within the gpd
+    #                                                   polygons are still valid
+
+    del ShapeMask
+    return masked_dataset
diff --git a/build/lib/phenocube/dask.py b/build/lib/phenocube/dask.py
@@ -0,0 +1,42 @@
+from importlib.util import find_spec
+import os
+import dask
+from IPython.display import display
+from datacube.utils.dask import start_local_dask
+
+_HAVE_PROXY = bool(find_spec('jupyter_server_proxy'))
+
+def create_local_dask_cluster(workers = 1, threads = None,  mem_limit=None, spare_mem='20Gb', display_client=True):
+    """
+    Using the datacube utils function `start_local_dask`, generate
+    a local dask cluster.
+
+    Parameters
+    ----------
+    spare_mem : String, optional
+        The amount of memory, in Gb, to leave for the notebook to run.
+        This memory will not be used by the cluster. e.g '3Gb'
+    display_client : Bool, optional
+        An optional boolean indicating whether to display a summary of
+        the dask client, including a link to monitor progress of the
+        analysis. Set to False to hide this display.
+    workers: int
+        Number of worker processes to launch
+    threads: int, optional
+        Number of threads per worker, default is as many as there are CPUs
+    mem_limit: String, optional
+        Maximum memory to use across all workers
+    """
+
+    if _HAVE_PROXY:
+        # Configure dashboard link to go over proxy
+        prefix = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
+        dask.config.set({"distributed.dashboard.link":
+                         prefix + "proxy/{port}/status"})
+
+    # Start up a local cluster
+    client = start_local_dask(n_workers = workers,threads_per_worker = threads, memory_limit = mem_limit,  mem_safety_margin=spare_mem)
+
+    # Show the dask cluster settings
+    if display_client:
+        display(client)
diff --git a/build/lib/phenocube/display_map_polygons.py b/build/lib/phenocube/display_map_polygons.py
@@ -0,0 +1,106 @@
+import folium
+import itertools    
+import math
+import numpy as np  
+
+def _degree_to_zoom_level(l1, l2, margin = 0.0):
+
+    degree = abs(l1 - l2) * (1 + margin)
+    zoom_level_int = 0
+    if degree != 0:
+        zoom_level_float = math.log(360/degree)/math.log(2)
+        zoom_level_int = int(zoom_level_float)
+    else:
+        zoom_level_int = 18
+    return zoom_level_int
+
+def display_map_polygons(gdf = None, tooltip_attributes = None, longitude = None, latitude = None):
+    """
+    Generates a folium map based on a lat-lon bounded rectangle.
+    
+    Description
+    ----------
+    Takes a geodataframe and plots the shapes of the geodataframe in a folium map. The extent is defined by a lat-lon
+    bounded rectangle. Also information or column values from the geodataframe can be defined for single shapes.
+    
+    Parameters
+    ----------
+    gdf: geopandas.Geodataframe
+        A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe
+        can also be used as input.
+    
+    tooltop_attributes: (string,string)
+        A tuple of column names of the geodataframe. The value of the defined columns are displayed for each observation
+        in the folium map by clicking on the shape.
+        
+    latitude: (float,float)
+        A tuple of latitude bounds in (min,max) format.
+    
+    longitude: (float, float)
+        A tuple of longitude bounds in (min,max) format.
+        
+
+    Returns
+    ----------
+    folium.Map
+        A map centered on the lat lon bounds displaying all shapes of the geodataframe with the defined column
+        informations.
+
+    .. _Folium
+        https://github.com/python-visualization/folium
+
+    """
+    #assert locations is not None   
+    assert latitude is not None
+    assert longitude is not None
+
+    ###### ###### ######   CALC ZOOM LEVEL     ###### ###### ######
+    margin = -0.5
+    zoom_bias = 0
+
+    lat_zoom_level = _degree_to_zoom_level(margin = margin, *latitude ) + zoom_bias
+    lon_zoom_level = _degree_to_zoom_level(margin = margin, *longitude) + zoom_bias
+    zoom_level = min(lat_zoom_level, lon_zoom_level) 
+
+    ###### ###### ######   CENTER POINT        ###### ###### ######
+
+    center = [np.mean(latitude), np.mean(longitude)]
+
+    ###### ###### ######   CREATE MAP         ###### ###### ######
+    map_hybrid = folium.Map(
+        location=center,
+        zoom_start=zoom_level, 
+        control_scale = True
+    )
+    folium.TileLayer(
+        tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}',
+        attr = 'Google',
+        name = 'Google Satellite',
+        overlay = True,
+        control = True
+    ).add_to(map_hybrid) 
+    ###### ###### ######     POLYGONS    ###### ###### ######
+    if gdf is not None:
+        gjson = gdf.to_json()
+        folium.features.GeoJson(gjson)
+        folium.GeoJson(
+            gjson, 
+            name='Felder', 
+            style_function=lambda feature: {
+                'fillColor': 'white',
+                'color': 'red',
+                'weight': 3,
+                'fillOpacity':0.1
+            },   
+            highlight_function=lambda x: {
+                'weight':5, 
+                'fillOpacity':0.5
+            },    
+            tooltip=folium.features.GeoJsonTooltip(
+                fields=tooltip_attributes                              
+            )
+        ).add_to(map_hybrid)   
+    folium.LayerControl().add_to(map_hybrid)    
+    return map_hybrid
+
+
diff --git a/build/lib/phenocube/extract.py b/build/lib/phenocube/extract.py
@@ -0,0 +1,123 @@
+import rasterio.features
+import xarray as xr
+import pandas as pd
+import numpy as np
+import geopandas as gpd
+import warnings
+
+
+def extract(dataset, gpd, bands, func="raw", na_rm=True):
+    """
+    Extracts values of xArray Dataset at the locations of spatial data and gives out all cell values or
+    calculates zonal statistics for the spatial data.
+
+    Description
+    ----------
+    Extract values from xArray Dataset at the locations of other spatial data. You can use points, lines,
+    or polygons in the form of a GeopandasDataframe. The values are extracted for every band defined in "bands".
+    It is possible to get all the "raw" datavalues (all values which are within the shape) for a spatial data shape
+    or to use a predefined function (e.g. mean, min, max, std) to calculate zonal statistics of all the values of the
+    spatial data shape.
+
+    Parameters
+    ----------
+    dataset: xr.Dataset
+        A multi-dimensional array with x,y and time dimensions and one or more data variables.
+
+    gpd: GeoPandasDataFrame
+        A geodataframe object with one or more observations or variables and a geometry column. A filtered geodataframe
+        can also be used as input.
+
+    bands: list,string
+        List with band names of dataset, e.g (["blue", "green", "red"]), for which values should be extracted.
+
+    func: string, (default = "raw")
+        Select name of statistics function like "mean", "max", "min", "std". If function is selected the values are
+        calculated with the defined function. Gives out one value for each band. The default is set to "raw". In this
+        case no function is applied and the values are displayed in "raw" format (all values) in the output dictionary.
+
+    na_rm: bool, (default = True)
+        If True all nan values are eliminated from the results. If False the results are displayed with nan values.
+        If a statistic function is defined in "func" the nan values are automatically removed.
+
+    Returns
+    -------
+    results_scenes: dictionary
+        Dict which contains dataframes for every scene of dataset with values of all bands or values calculated by a
+        function which are located within the spatial data. The dataframes contain the values for each bands and the
+        same index like gpd, to assign the values to a specific spatial data shape. To display the extracted values
+        of a single timestep (scene) just select the index of the desired timestep in the output dictionary, like e.g.
+
+        example = extract(dataset, gpd, bands = ["blue", "green", "red"], func = "mean")
+        example["scene_index_0"]
+
+        """
+
+    warnings.filterwarnings("ignore") # ignore warnings for nan values
+
+    results_scenes = {}  # empty array for storring all bandvalues for a single scene
+    index = gpd.index  # creates array of indexes of the polygones
+
+    for t in range(len(dataset.time)):  # selects the single scene of a dataset
+        scene = dataset.isel(time=t)
+
+        results_df = {}  # empty array for storing band values
+        for i in index:
+            vec = gpd.loc[i]
+            ShapeMask = rasterio.features.geometry_mask(vec["geom"],
+                                                        # selects geometry of the desired gpd and forms a boolean mask from it
+                                                        out_shape=(len(scene.latitude), len(scene.longitude)),
+                                                        transform=scene.geobox.transform,
+                                                        invert=True)
+            ShapeMask = xr.DataArray(ShapeMask,
+                                     dims=("latitude", "longitude"))  # converts boolean mask into an xArray format
+
+            masked_dataset = scene.where(
+                ShapeMask == True)  # combines mask and dataset so only the pixels within the gpd polygons are still valid
+
+            results = {}
+            for j in bands:
+                values = masked_dataset[j].values
+                if na_rm == True:  # if na remove argument is true remove all nan values before storing the array
+                    values = values[np.logical_not(np.isnan(values))]
+                    if func == "raw":  # stores "raw" values
+                        results[j] = pd.DataFrame({i: [values]})
+                    elif func == "mean":  # calculates mean and stores mean value
+                        mean = np.mean(values)
+                        results[j] = pd.DataFrame({i: [mean]})
+                    elif func == "min":  # calculates the min value
+                        minn = np.min(values)
+                        results[j] = pd.DataFrame({i: [minn]})
+                    elif func == "max":  # calculates the max value
+                        maxx = np.max(values)
+                        results[j] = pd.DataFrame({i: [maxx]})
+                    elif func == "std":  # calculates the standard deviation
+                        std = np.std(values)
+                        results[j] = pd.DataFrame({i: [std]})
+                else:  # keeps all nan values
+                    if func == "raw":  # stores "raw" values
+                        results[j] = pd.DataFrame({i: [values]})  # stores the array with nan values
+                    elif func == "mean":  # calculates mean and stores mean value
+                        mean = np.nanmean(values)  # eliminates nan values
+                        results[j] = pd.DataFrame({i: [mean]})
+                    elif func == "min":  # calculates the min value
+                        minn = np.nanmin(values)
+                        results[j] = pd.DataFrame({i: [minn]})
+                    elif func == "max":  # calculates the max value
+                        maxx = np.nanmax(values)
+                        results[j] = pd.DataFrame({i: [maxx]})
+                    elif func == "std":  # calculates the standard deviation
+                        std = np.nanstd(values)
+                        results[j] = pd.DataFrame({i: [std]})
+
+            vec_df = pd.concat(results, axis=1)  # concatenate all band values of a shape i to a data frame
+            vec_df.columns = bands  # rename column names to band names
+            vec_df["id"] = i  # add index of polygon
+
+            results_df[str(i)] = vec_df  # store dataframe for in a dictionary
+
+        df = pd.concat(results_df, ignore_index=True)  # concatenate all dataframes for each shape to one dataframe
+        df = df.set_index("id")  # sets index to shape index
+        results_scenes["scene_index_" + str(t)] = df  # store dataframe with raw pixel values into dict
+
+    return results_scenes
diff --git a/build/lib/phenocube/load_gdf.py b/build/lib/phenocube/load_gdf.py
@@ -0,0 +1,51 @@
+import geopandas as gpd
+import psycopg2
+
+def load_gdf(filename, cols = "all"):
+    """
+    Loads a vectordataset from the PostGIS database and stores it as a gpd.Geodataframe.
+
+    Description
+    ----------
+    Connects to the PostGIS database and loads a vectordataset from there. The vectordataset must be imported to the PostGIS
+    database in advance (this can be done by a user of the Departement of Remote Sensing of the University Würzburg).
+    The function calls the file by its filename in the PostGIS database. It´s possible to define which columns should be
+    included. The default alternative is to load all existing columns.
+
+    Parameters
+    ----------
+    filename: string
+        The name of the vectordataset in the PostGIS database. If unsure, ask the staff member of the Department of Remote
+        Sensing which imported your dataset to the PostGIS database.
+
+    cols: list,string (default = "all")
+        List with column names of the vectordataset, e.g (["column1", "column2", "column3"]), which should be loaded. If
+        used, only the "real" existing columns need to be defined. The "geom" column and the "gid" column (indexing of
+        the PostGIS database) are always loaded. The default is set to "all" which loads all columns of the vectordataset.
+
+    Returns
+    -------
+    gdf: gpd.GeoDataframe
+        Geodataframe from PostGIS database with the defined or all columns.
+
+        """
+
+    # PostGIS credentials
+    user = 'dc_user'
+    password = 'localuser1234'
+    host = '127.0.0.1'
+    port = '5432'
+    database = 'datacube'
+
+    connection = psycopg2.connect(database=database, user=user, password=password, host=host) # connection to PostGIS
+    if type(cols) is list:
+        sql = "select geom, gid, " + ", ".join(cols) + " from " + filename + ";" # define which attributes of shapefile should be included
+        gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table
+    elif cols == "all":
+        sql = "select * from " + filename + ";" # define which attributes of shapefile should be included
+        gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table
+
+    connection.close()
+
+    return gdf
+