Skip to content

Commit

Permalink
added poetry
Browse files Browse the repository at this point in the history
  • Loading branch information
SteveMHill committed Dec 14, 2020
1 parent f3c2d2b commit 30e5efd
Show file tree
Hide file tree
Showing 589 changed files with 244,741 additions and 188 deletions.
5 changes: 5 additions & 0 deletions build/lib/phenocube/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""phenocube - phenocube is an python package providing a wide range of tools for working with the phenocube environment"""

__version__ = '0.1.0'
__author__ = 'Steven Hill <[email protected]>'
__all__ = []
38 changes: 38 additions & 0 deletions build/lib/phenocube/clip_xr2gpd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import rasterio.features
import xarray as xr

def clip_xr2gpd(dataset, gpd):
"""
Clips a xArray Dataset to a GeoPandasDataframe.
Description
----------
Takes an xArray dataset and a Geodataframe and clips the xArray dataset to the shape of the Geodataframe.
Parameters
----------
dataset: xr.Dataset
A multi-dimensional array with x,y and time dimensions and one or more data variables.
gpd: geopandas.Geodataframe
A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe
can also be used as input.
Returns
-------
masked_dataset: xr.Dataset
A xr.Dataset like the input dataset with only pixels which are within the polygons of the geopandas.Geodataframe.
Every other pixel is given the value NaN.
"""

# selects geometry of the desired gpd and formsa boolean mask from it
ShapeMask = rasterio.features.geometry_mask(gpd.loc[:, "geom"],
out_shape=(len(dataset.latitude), len(dataset.longitude)),
transform=dataset.geobox.transform,
invert=True)
ShapeMask = xr.DataArray(ShapeMask, dims=("latitude", "longitude")) # converts boolean mask into an xArray format
masked_dataset = dataset.where(ShapeMask == True) # combines mask and dataset so only the pixels within the gpd
# polygons are still valid

del ShapeMask
return masked_dataset
42 changes: 42 additions & 0 deletions build/lib/phenocube/dask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from importlib.util import find_spec
import os
import dask
from IPython.display import display
from datacube.utils.dask import start_local_dask

_HAVE_PROXY = bool(find_spec('jupyter_server_proxy'))

def create_local_dask_cluster(workers = 1, threads = None, mem_limit=None, spare_mem='20Gb', display_client=True):
"""
Using the datacube utils function `start_local_dask`, generate
a local dask cluster.
Parameters
----------
spare_mem : String, optional
The amount of memory, in Gb, to leave for the notebook to run.
This memory will not be used by the cluster. e.g '3Gb'
display_client : Bool, optional
An optional boolean indicating whether to display a summary of
the dask client, including a link to monitor progress of the
analysis. Set to False to hide this display.
workers: int
Number of worker processes to launch
threads: int, optional
Number of threads per worker, default is as many as there are CPUs
mem_limit: String, optional
Maximum memory to use across all workers
"""

if _HAVE_PROXY:
# Configure dashboard link to go over proxy
prefix = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
dask.config.set({"distributed.dashboard.link":
prefix + "proxy/{port}/status"})

# Start up a local cluster
client = start_local_dask(n_workers = workers,threads_per_worker = threads, memory_limit = mem_limit, mem_safety_margin=spare_mem)

# Show the dask cluster settings
if display_client:
display(client)
106 changes: 106 additions & 0 deletions build/lib/phenocube/display_map_polygons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import folium
import itertools
import math
import numpy as np

def _degree_to_zoom_level(l1, l2, margin = 0.0):

degree = abs(l1 - l2) * (1 + margin)
zoom_level_int = 0
if degree != 0:
zoom_level_float = math.log(360/degree)/math.log(2)
zoom_level_int = int(zoom_level_float)
else:
zoom_level_int = 18
return zoom_level_int

def display_map_polygons(gdf = None, tooltip_attributes = None, longitude = None, latitude = None):
"""
Generates a folium map based on a lat-lon bounded rectangle.
Description
----------
Takes a geodataframe and plots the shapes of the geodataframe in a folium map. The extent is defined by a lat-lon
bounded rectangle. Also information or column values from the geodataframe can be defined for single shapes.
Parameters
----------
gdf: geopandas.Geodataframe
A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe
can also be used as input.
tooltop_attributes: (string,string)
A tuple of column names of the geodataframe. The value of the defined columns are displayed for each observation
in the folium map by clicking on the shape.
latitude: (float,float)
A tuple of latitude bounds in (min,max) format.
longitude: (float, float)
A tuple of longitude bounds in (min,max) format.
Returns
----------
folium.Map
A map centered on the lat lon bounds displaying all shapes of the geodataframe with the defined column
informations.
.. _Folium
https://github.com/python-visualization/folium
"""
#assert locations is not None
assert latitude is not None
assert longitude is not None

###### ###### ###### CALC ZOOM LEVEL ###### ###### ######
margin = -0.5
zoom_bias = 0

lat_zoom_level = _degree_to_zoom_level(margin = margin, *latitude ) + zoom_bias
lon_zoom_level = _degree_to_zoom_level(margin = margin, *longitude) + zoom_bias
zoom_level = min(lat_zoom_level, lon_zoom_level)

###### ###### ###### CENTER POINT ###### ###### ######

center = [np.mean(latitude), np.mean(longitude)]

###### ###### ###### CREATE MAP ###### ###### ######
map_hybrid = folium.Map(
location=center,
zoom_start=zoom_level,
control_scale = True
)
folium.TileLayer(
tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}',
attr = 'Google',
name = 'Google Satellite',
overlay = True,
control = True
).add_to(map_hybrid)
###### ###### ###### POLYGONS ###### ###### ######
if gdf is not None:
gjson = gdf.to_json()
folium.features.GeoJson(gjson)
folium.GeoJson(
gjson,
name='Felder',
style_function=lambda feature: {
'fillColor': 'white',
'color': 'red',
'weight': 3,
'fillOpacity':0.1
},
highlight_function=lambda x: {
'weight':5,
'fillOpacity':0.5
},
tooltip=folium.features.GeoJsonTooltip(
fields=tooltip_attributes
)
).add_to(map_hybrid)
folium.LayerControl().add_to(map_hybrid)
return map_hybrid


123 changes: 123 additions & 0 deletions build/lib/phenocube/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import rasterio.features
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
import warnings


def extract(dataset, gpd, bands, func="raw", na_rm=True):
"""
Extracts values of xArray Dataset at the locations of spatial data and gives out all cell values or
calculates zonal statistics for the spatial data.
Description
----------
Extract values from xArray Dataset at the locations of other spatial data. You can use points, lines,
or polygons in the form of a GeopandasDataframe. The values are extracted for every band defined in "bands".
It is possible to get all the "raw" datavalues (all values which are within the shape) for a spatial data shape
or to use a predefined function (e.g. mean, min, max, std) to calculate zonal statistics of all the values of the
spatial data shape.
Parameters
----------
dataset: xr.Dataset
A multi-dimensional array with x,y and time dimensions and one or more data variables.
gpd: GeoPandasDataFrame
A geodataframe object with one or more observations or variables and a geometry column. A filtered geodataframe
can also be used as input.
bands: list,string
List with band names of dataset, e.g (["blue", "green", "red"]), for which values should be extracted.
func: string, (default = "raw")
Select name of statistics function like "mean", "max", "min", "std". If function is selected the values are
calculated with the defined function. Gives out one value for each band. The default is set to "raw". In this
case no function is applied and the values are displayed in "raw" format (all values) in the output dictionary.
na_rm: bool, (default = True)
If True all nan values are eliminated from the results. If False the results are displayed with nan values.
If a statistic function is defined in "func" the nan values are automatically removed.
Returns
-------
results_scenes: dictionary
Dict which contains dataframes for every scene of dataset with values of all bands or values calculated by a
function which are located within the spatial data. The dataframes contain the values for each bands and the
same index like gpd, to assign the values to a specific spatial data shape. To display the extracted values
of a single timestep (scene) just select the index of the desired timestep in the output dictionary, like e.g.
example = extract(dataset, gpd, bands = ["blue", "green", "red"], func = "mean")
example["scene_index_0"]
"""

warnings.filterwarnings("ignore") # ignore warnings for nan values

results_scenes = {} # empty array for storring all bandvalues for a single scene
index = gpd.index # creates array of indexes of the polygones

for t in range(len(dataset.time)): # selects the single scene of a dataset
scene = dataset.isel(time=t)

results_df = {} # empty array for storing band values
for i in index:
vec = gpd.loc[i]
ShapeMask = rasterio.features.geometry_mask(vec["geom"],
# selects geometry of the desired gpd and forms a boolean mask from it
out_shape=(len(scene.latitude), len(scene.longitude)),
transform=scene.geobox.transform,
invert=True)
ShapeMask = xr.DataArray(ShapeMask,
dims=("latitude", "longitude")) # converts boolean mask into an xArray format

masked_dataset = scene.where(
ShapeMask == True) # combines mask and dataset so only the pixels within the gpd polygons are still valid

results = {}
for j in bands:
values = masked_dataset[j].values
if na_rm == True: # if na remove argument is true remove all nan values before storing the array
values = values[np.logical_not(np.isnan(values))]
if func == "raw": # stores "raw" values
results[j] = pd.DataFrame({i: [values]})
elif func == "mean": # calculates mean and stores mean value
mean = np.mean(values)
results[j] = pd.DataFrame({i: [mean]})
elif func == "min": # calculates the min value
minn = np.min(values)
results[j] = pd.DataFrame({i: [minn]})
elif func == "max": # calculates the max value
maxx = np.max(values)
results[j] = pd.DataFrame({i: [maxx]})
elif func == "std": # calculates the standard deviation
std = np.std(values)
results[j] = pd.DataFrame({i: [std]})
else: # keeps all nan values
if func == "raw": # stores "raw" values
results[j] = pd.DataFrame({i: [values]}) # stores the array with nan values
elif func == "mean": # calculates mean and stores mean value
mean = np.nanmean(values) # eliminates nan values
results[j] = pd.DataFrame({i: [mean]})
elif func == "min": # calculates the min value
minn = np.nanmin(values)
results[j] = pd.DataFrame({i: [minn]})
elif func == "max": # calculates the max value
maxx = np.nanmax(values)
results[j] = pd.DataFrame({i: [maxx]})
elif func == "std": # calculates the standard deviation
std = np.nanstd(values)
results[j] = pd.DataFrame({i: [std]})

vec_df = pd.concat(results, axis=1) # concatenate all band values of a shape i to a data frame
vec_df.columns = bands # rename column names to band names
vec_df["id"] = i # add index of polygon

results_df[str(i)] = vec_df # store dataframe for in a dictionary

df = pd.concat(results_df, ignore_index=True) # concatenate all dataframes for each shape to one dataframe
df = df.set_index("id") # sets index to shape index
results_scenes["scene_index_" + str(t)] = df # store dataframe with raw pixel values into dict

return results_scenes
51 changes: 51 additions & 0 deletions build/lib/phenocube/load_gdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import geopandas as gpd
import psycopg2

def load_gdf(filename, cols = "all"):
"""
Loads a vectordataset from the PostGIS database and stores it as a gpd.Geodataframe.
Description
----------
Connects to the PostGIS database and loads a vectordataset from there. The vectordataset must be imported to the PostGIS
database in advance (this can be done by a user of the Departement of Remote Sensing of the University Würzburg).
The function calls the file by its filename in the PostGIS database. It´s possible to define which columns should be
included. The default alternative is to load all existing columns.
Parameters
----------
filename: string
The name of the vectordataset in the PostGIS database. If unsure, ask the staff member of the Department of Remote
Sensing which imported your dataset to the PostGIS database.
cols: list,string (default = "all")
List with column names of the vectordataset, e.g (["column1", "column2", "column3"]), which should be loaded. If
used, only the "real" existing columns need to be defined. The "geom" column and the "gid" column (indexing of
the PostGIS database) are always loaded. The default is set to "all" which loads all columns of the vectordataset.
Returns
-------
gdf: gpd.GeoDataframe
Geodataframe from PostGIS database with the defined or all columns.
"""

# PostGIS credentials
user = 'dc_user'
password = 'localuser1234'
host = '127.0.0.1'
port = '5432'
database = 'datacube'

connection = psycopg2.connect(database=database, user=user, password=password, host=host) # connection to PostGIS
if type(cols) is list:
sql = "select geom, gid, " + ", ".join(cols) + " from " + filename + ";" # define which attributes of shapefile should be included
gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table
elif cols == "all":
sql = "select * from " + filename + ";" # define which attributes of shapefile should be included
gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table

connection.close()

return gdf

Loading

0 comments on commit 30e5efd

Please sign in to comment.