-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f3c2d2b
commit 30e5efd
Showing
589 changed files
with
244,741 additions
and
188 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
"""phenocube - phenocube is an python package providing a wide range of tools for working with the phenocube environment""" | ||
|
||
__version__ = '0.1.0' | ||
__author__ = 'Steven Hill <[email protected]>' | ||
__all__ = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import rasterio.features | ||
import xarray as xr | ||
|
||
def clip_xr2gpd(dataset, gpd): | ||
""" | ||
Clips a xArray Dataset to a GeoPandasDataframe. | ||
Description | ||
---------- | ||
Takes an xArray dataset and a Geodataframe and clips the xArray dataset to the shape of the Geodataframe. | ||
Parameters | ||
---------- | ||
dataset: xr.Dataset | ||
A multi-dimensional array with x,y and time dimensions and one or more data variables. | ||
gpd: geopandas.Geodataframe | ||
A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe | ||
can also be used as input. | ||
Returns | ||
------- | ||
masked_dataset: xr.Dataset | ||
A xr.Dataset like the input dataset with only pixels which are within the polygons of the geopandas.Geodataframe. | ||
Every other pixel is given the value NaN. | ||
""" | ||
|
||
# selects geometry of the desired gpd and formsa boolean mask from it | ||
ShapeMask = rasterio.features.geometry_mask(gpd.loc[:, "geom"], | ||
out_shape=(len(dataset.latitude), len(dataset.longitude)), | ||
transform=dataset.geobox.transform, | ||
invert=True) | ||
ShapeMask = xr.DataArray(ShapeMask, dims=("latitude", "longitude")) # converts boolean mask into an xArray format | ||
masked_dataset = dataset.where(ShapeMask == True) # combines mask and dataset so only the pixels within the gpd | ||
# polygons are still valid | ||
|
||
del ShapeMask | ||
return masked_dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from importlib.util import find_spec | ||
import os | ||
import dask | ||
from IPython.display import display | ||
from datacube.utils.dask import start_local_dask | ||
|
||
_HAVE_PROXY = bool(find_spec('jupyter_server_proxy')) | ||
|
||
def create_local_dask_cluster(workers = 1, threads = None, mem_limit=None, spare_mem='20Gb', display_client=True): | ||
""" | ||
Using the datacube utils function `start_local_dask`, generate | ||
a local dask cluster. | ||
Parameters | ||
---------- | ||
spare_mem : String, optional | ||
The amount of memory, in Gb, to leave for the notebook to run. | ||
This memory will not be used by the cluster. e.g '3Gb' | ||
display_client : Bool, optional | ||
An optional boolean indicating whether to display a summary of | ||
the dask client, including a link to monitor progress of the | ||
analysis. Set to False to hide this display. | ||
workers: int | ||
Number of worker processes to launch | ||
threads: int, optional | ||
Number of threads per worker, default is as many as there are CPUs | ||
mem_limit: String, optional | ||
Maximum memory to use across all workers | ||
""" | ||
|
||
if _HAVE_PROXY: | ||
# Configure dashboard link to go over proxy | ||
prefix = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/') | ||
dask.config.set({"distributed.dashboard.link": | ||
prefix + "proxy/{port}/status"}) | ||
|
||
# Start up a local cluster | ||
client = start_local_dask(n_workers = workers,threads_per_worker = threads, memory_limit = mem_limit, mem_safety_margin=spare_mem) | ||
|
||
# Show the dask cluster settings | ||
if display_client: | ||
display(client) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import folium | ||
import itertools | ||
import math | ||
import numpy as np | ||
|
||
def _degree_to_zoom_level(l1, l2, margin = 0.0): | ||
|
||
degree = abs(l1 - l2) * (1 + margin) | ||
zoom_level_int = 0 | ||
if degree != 0: | ||
zoom_level_float = math.log(360/degree)/math.log(2) | ||
zoom_level_int = int(zoom_level_float) | ||
else: | ||
zoom_level_int = 18 | ||
return zoom_level_int | ||
|
||
def display_map_polygons(gdf = None, tooltip_attributes = None, longitude = None, latitude = None): | ||
""" | ||
Generates a folium map based on a lat-lon bounded rectangle. | ||
Description | ||
---------- | ||
Takes a geodataframe and plots the shapes of the geodataframe in a folium map. The extent is defined by a lat-lon | ||
bounded rectangle. Also information or column values from the geodataframe can be defined for single shapes. | ||
Parameters | ||
---------- | ||
gdf: geopandas.Geodataframe | ||
A geodataframe object with one or more observations or variables and a geometry column. A filterd geodataframe | ||
can also be used as input. | ||
tooltop_attributes: (string,string) | ||
A tuple of column names of the geodataframe. The value of the defined columns are displayed for each observation | ||
in the folium map by clicking on the shape. | ||
latitude: (float,float) | ||
A tuple of latitude bounds in (min,max) format. | ||
longitude: (float, float) | ||
A tuple of longitude bounds in (min,max) format. | ||
Returns | ||
---------- | ||
folium.Map | ||
A map centered on the lat lon bounds displaying all shapes of the geodataframe with the defined column | ||
informations. | ||
.. _Folium | ||
https://github.com/python-visualization/folium | ||
""" | ||
#assert locations is not None | ||
assert latitude is not None | ||
assert longitude is not None | ||
|
||
###### ###### ###### CALC ZOOM LEVEL ###### ###### ###### | ||
margin = -0.5 | ||
zoom_bias = 0 | ||
|
||
lat_zoom_level = _degree_to_zoom_level(margin = margin, *latitude ) + zoom_bias | ||
lon_zoom_level = _degree_to_zoom_level(margin = margin, *longitude) + zoom_bias | ||
zoom_level = min(lat_zoom_level, lon_zoom_level) | ||
|
||
###### ###### ###### CENTER POINT ###### ###### ###### | ||
|
||
center = [np.mean(latitude), np.mean(longitude)] | ||
|
||
###### ###### ###### CREATE MAP ###### ###### ###### | ||
map_hybrid = folium.Map( | ||
location=center, | ||
zoom_start=zoom_level, | ||
control_scale = True | ||
) | ||
folium.TileLayer( | ||
tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}', | ||
attr = 'Google', | ||
name = 'Google Satellite', | ||
overlay = True, | ||
control = True | ||
).add_to(map_hybrid) | ||
###### ###### ###### POLYGONS ###### ###### ###### | ||
if gdf is not None: | ||
gjson = gdf.to_json() | ||
folium.features.GeoJson(gjson) | ||
folium.GeoJson( | ||
gjson, | ||
name='Felder', | ||
style_function=lambda feature: { | ||
'fillColor': 'white', | ||
'color': 'red', | ||
'weight': 3, | ||
'fillOpacity':0.1 | ||
}, | ||
highlight_function=lambda x: { | ||
'weight':5, | ||
'fillOpacity':0.5 | ||
}, | ||
tooltip=folium.features.GeoJsonTooltip( | ||
fields=tooltip_attributes | ||
) | ||
).add_to(map_hybrid) | ||
folium.LayerControl().add_to(map_hybrid) | ||
return map_hybrid | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import rasterio.features | ||
import xarray as xr | ||
import pandas as pd | ||
import numpy as np | ||
import geopandas as gpd | ||
import warnings | ||
|
||
|
||
def extract(dataset, gpd, bands, func="raw", na_rm=True): | ||
""" | ||
Extracts values of xArray Dataset at the locations of spatial data and gives out all cell values or | ||
calculates zonal statistics for the spatial data. | ||
Description | ||
---------- | ||
Extract values from xArray Dataset at the locations of other spatial data. You can use points, lines, | ||
or polygons in the form of a GeopandasDataframe. The values are extracted for every band defined in "bands". | ||
It is possible to get all the "raw" datavalues (all values which are within the shape) for a spatial data shape | ||
or to use a predefined function (e.g. mean, min, max, std) to calculate zonal statistics of all the values of the | ||
spatial data shape. | ||
Parameters | ||
---------- | ||
dataset: xr.Dataset | ||
A multi-dimensional array with x,y and time dimensions and one or more data variables. | ||
gpd: GeoPandasDataFrame | ||
A geodataframe object with one or more observations or variables and a geometry column. A filtered geodataframe | ||
can also be used as input. | ||
bands: list,string | ||
List with band names of dataset, e.g (["blue", "green", "red"]), for which values should be extracted. | ||
func: string, (default = "raw") | ||
Select name of statistics function like "mean", "max", "min", "std". If function is selected the values are | ||
calculated with the defined function. Gives out one value for each band. The default is set to "raw". In this | ||
case no function is applied and the values are displayed in "raw" format (all values) in the output dictionary. | ||
na_rm: bool, (default = True) | ||
If True all nan values are eliminated from the results. If False the results are displayed with nan values. | ||
If a statistic function is defined in "func" the nan values are automatically removed. | ||
Returns | ||
------- | ||
results_scenes: dictionary | ||
Dict which contains dataframes for every scene of dataset with values of all bands or values calculated by a | ||
function which are located within the spatial data. The dataframes contain the values for each bands and the | ||
same index like gpd, to assign the values to a specific spatial data shape. To display the extracted values | ||
of a single timestep (scene) just select the index of the desired timestep in the output dictionary, like e.g. | ||
example = extract(dataset, gpd, bands = ["blue", "green", "red"], func = "mean") | ||
example["scene_index_0"] | ||
""" | ||
|
||
warnings.filterwarnings("ignore") # ignore warnings for nan values | ||
|
||
results_scenes = {} # empty array for storring all bandvalues for a single scene | ||
index = gpd.index # creates array of indexes of the polygones | ||
|
||
for t in range(len(dataset.time)): # selects the single scene of a dataset | ||
scene = dataset.isel(time=t) | ||
|
||
results_df = {} # empty array for storing band values | ||
for i in index: | ||
vec = gpd.loc[i] | ||
ShapeMask = rasterio.features.geometry_mask(vec["geom"], | ||
# selects geometry of the desired gpd and forms a boolean mask from it | ||
out_shape=(len(scene.latitude), len(scene.longitude)), | ||
transform=scene.geobox.transform, | ||
invert=True) | ||
ShapeMask = xr.DataArray(ShapeMask, | ||
dims=("latitude", "longitude")) # converts boolean mask into an xArray format | ||
|
||
masked_dataset = scene.where( | ||
ShapeMask == True) # combines mask and dataset so only the pixels within the gpd polygons are still valid | ||
|
||
results = {} | ||
for j in bands: | ||
values = masked_dataset[j].values | ||
if na_rm == True: # if na remove argument is true remove all nan values before storing the array | ||
values = values[np.logical_not(np.isnan(values))] | ||
if func == "raw": # stores "raw" values | ||
results[j] = pd.DataFrame({i: [values]}) | ||
elif func == "mean": # calculates mean and stores mean value | ||
mean = np.mean(values) | ||
results[j] = pd.DataFrame({i: [mean]}) | ||
elif func == "min": # calculates the min value | ||
minn = np.min(values) | ||
results[j] = pd.DataFrame({i: [minn]}) | ||
elif func == "max": # calculates the max value | ||
maxx = np.max(values) | ||
results[j] = pd.DataFrame({i: [maxx]}) | ||
elif func == "std": # calculates the standard deviation | ||
std = np.std(values) | ||
results[j] = pd.DataFrame({i: [std]}) | ||
else: # keeps all nan values | ||
if func == "raw": # stores "raw" values | ||
results[j] = pd.DataFrame({i: [values]}) # stores the array with nan values | ||
elif func == "mean": # calculates mean and stores mean value | ||
mean = np.nanmean(values) # eliminates nan values | ||
results[j] = pd.DataFrame({i: [mean]}) | ||
elif func == "min": # calculates the min value | ||
minn = np.nanmin(values) | ||
results[j] = pd.DataFrame({i: [minn]}) | ||
elif func == "max": # calculates the max value | ||
maxx = np.nanmax(values) | ||
results[j] = pd.DataFrame({i: [maxx]}) | ||
elif func == "std": # calculates the standard deviation | ||
std = np.nanstd(values) | ||
results[j] = pd.DataFrame({i: [std]}) | ||
|
||
vec_df = pd.concat(results, axis=1) # concatenate all band values of a shape i to a data frame | ||
vec_df.columns = bands # rename column names to band names | ||
vec_df["id"] = i # add index of polygon | ||
|
||
results_df[str(i)] = vec_df # store dataframe for in a dictionary | ||
|
||
df = pd.concat(results_df, ignore_index=True) # concatenate all dataframes for each shape to one dataframe | ||
df = df.set_index("id") # sets index to shape index | ||
results_scenes["scene_index_" + str(t)] = df # store dataframe with raw pixel values into dict | ||
|
||
return results_scenes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import geopandas as gpd | ||
import psycopg2 | ||
|
||
def load_gdf(filename, cols = "all"): | ||
""" | ||
Loads a vectordataset from the PostGIS database and stores it as a gpd.Geodataframe. | ||
Description | ||
---------- | ||
Connects to the PostGIS database and loads a vectordataset from there. The vectordataset must be imported to the PostGIS | ||
database in advance (this can be done by a user of the Departement of Remote Sensing of the University Würzburg). | ||
The function calls the file by its filename in the PostGIS database. It´s possible to define which columns should be | ||
included. The default alternative is to load all existing columns. | ||
Parameters | ||
---------- | ||
filename: string | ||
The name of the vectordataset in the PostGIS database. If unsure, ask the staff member of the Department of Remote | ||
Sensing which imported your dataset to the PostGIS database. | ||
cols: list,string (default = "all") | ||
List with column names of the vectordataset, e.g (["column1", "column2", "column3"]), which should be loaded. If | ||
used, only the "real" existing columns need to be defined. The "geom" column and the "gid" column (indexing of | ||
the PostGIS database) are always loaded. The default is set to "all" which loads all columns of the vectordataset. | ||
Returns | ||
------- | ||
gdf: gpd.GeoDataframe | ||
Geodataframe from PostGIS database with the defined or all columns. | ||
""" | ||
|
||
# PostGIS credentials | ||
user = 'dc_user' | ||
password = 'localuser1234' | ||
host = '127.0.0.1' | ||
port = '5432' | ||
database = 'datacube' | ||
|
||
connection = psycopg2.connect(database=database, user=user, password=password, host=host) # connection to PostGIS | ||
if type(cols) is list: | ||
sql = "select geom, gid, " + ", ".join(cols) + " from " + filename + ";" # define which attributes of shapefile should be included | ||
gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table | ||
elif cols == "all": | ||
sql = "select * from " + filename + ";" # define which attributes of shapefile should be included | ||
gdf = gpd.GeoDataFrame.from_postgis(sql, connection) # creates GeoDataFrame from the sql table | ||
|
||
connection.close() | ||
|
||
return gdf | ||
|
Oops, something went wrong.