Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added units flag scatter #134

Merged
merged 10 commits into from
Oct 4, 2022
2 changes: 1 addition & 1 deletion .github/workflows/full_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v2
Expand Down
63 changes: 28 additions & 35 deletions fmskill/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ def scatter(
df: pd.DataFrame = None,
binsize: float = None,
nbins: int = None,
skill_table: bool = False,
skill_table: Union[str, List[str], bool] = None,
**kwargs,
):
"""Scatter plot showing compared data: observation vs modelled
Expand Down Expand Up @@ -902,9 +902,10 @@ def scatter(
by default None
df : pd.dataframe, optional
show user-provided data instead of the comparers own data, by default None
skill_table : bool, optional
calculates the main skills (bias, rmse, si, r2, etc) and adds a box at
the right of the scatter plot, by default False
skill_table : str, List[str], bool, optional
list of fmskill.metrics or boolean, if True then by default [bias, rmse, urmse, mae, cc, si, r2].
This kword adds a box at the right of the scatter plot,
by default False
kwargs

Examples
Expand Down Expand Up @@ -952,6 +953,27 @@ def scatter(

if title is None:
title = f"{self.mod_names[mod_id]} vs {self.name}"

if skill_table != None:
# Calculate Skill if it was requested to add as table on the right of plot
if skill_table==True:
skill_df = self.skill(df=df,model=model,observation=observation,variable=variable)
elif type(skill_table)==list:
skill_df = self.skill(df=df,metrics=skill_table,model=model,observation=observation,variable=variable)
stats_with_units=["bias", "rmse", "urmse", "mae","max_error"]
# Check for units
try:
units=unit_text.split('[')[1].split(']')[0]
except:
# Dimensionless
units=''
if skill_table==False:
skill_df=None
units=None
else:
# skill_table is None
skill_df=None
units=None

scatter(
x=x,
Expand All @@ -969,41 +991,12 @@ def scatter(
title=title,
xlabel=xlabel,
ylabel=ylabel,
skill_df= skill_df,
units=units,
binsize=binsize,
nbins=nbins,
**kwargs,
)
if skill_table:
# Calculate Skill if it was requested to add as table on the right of plot
skill_df = self.skill(
metrics=["bias", "rmse", "urmse", "mae", "cc", "si", "r2"], df=df
) # df is filtered to matching subset
lines = []

max_str_len = skill_df.df.columns.str.len().max()

for col in skill_df.df.columns:
if col == "model":
continue
lines.append(
f"{col.ljust(max_str_len)} {np.round(skill_df.df[col].values[0],3)}"
)

text_ = "\n".join(lines)

plt.gcf().text(
0.97,
0.6,
text_,
bbox={
"facecolor": "blue",
"edgecolor": "k",
"boxstyle": "round",
"alpha": 0.05,
},
fontsize=12,
family="monospace",
)

def taylor(
self,
Expand Down
25 changes: 18 additions & 7 deletions fmskill/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Observation:
# matplotlib: red=#d62728

def __init__(
self, name: str = None, df=None, itemInfo=None, variable_name: str = None
self, name: str = None, df=None, itemInfo=None, variable_name: str = None, override_units: str = None,
):
self.color = "#d62728"

Expand All @@ -67,7 +67,7 @@ def __init__(
if variable_name is None:
variable_name = self.itemInfo.type.name
self.variable_name = variable_name

self.override_units= override_units
@property
def time(self) -> pd.DatetimeIndex:
"Time index"
Expand Down Expand Up @@ -99,12 +99,17 @@ def filename(self):
return self._filename

def _unit_text(self):
override_units=self.override_units
if self.itemInfo is None:
return ""
txt = f"{self.itemInfo.type.display_name}"
if self.itemInfo.type != mikeio.EUMType.Undefined:
unit = self.itemInfo.unit.display_name
txt = f"{txt} [{unit_display_name(unit)}]"
if override_units==None:
unit = self.itemInfo.unit.display_name
txt = f"{txt} [{unit_display_name(unit)}]"
else:
unit = override_units
txt = f"{txt} [{override_units}]"
return txt

def hist(self, bins=100, title=None, color=None, **kwargs):
Expand Down Expand Up @@ -162,6 +167,8 @@ class PointObservation(Observation):
user-defined name for easy identification in plots etc, by default file basename
variable_name : str, optional
user-defined variable name in case of multiple variables, by default eumType name
units : str, optional
user-defined units name in case user wants to override eumUnits

Examples
--------
Expand Down Expand Up @@ -191,6 +198,7 @@ def __init__(
z: float = None,
name: str = None,
variable_name: str = None,
units: str = None,
):

self.x = x
Expand Down Expand Up @@ -254,7 +262,7 @@ def __init__(
)

super().__init__(
name=name, df=df, itemInfo=itemInfo, variable_name=variable_name
name=name, df=df, itemInfo=itemInfo, variable_name=variable_name, override_units=units,
)

def __repr__(self):
Expand Down Expand Up @@ -333,6 +341,8 @@ class TrackObservation(Observation):
item name or index of y-coordinate, by default 1
offset_duplicates : float, optional
in case of duplicate timestamps, add this many seconds to consecutive duplicate entries, by default 0.001
units : str, optional
user-defined units name in case user wants to override eumUnits


Examples
Expand Down Expand Up @@ -408,6 +418,7 @@ def __init__(
x_item=0,
y_item=1,
offset_duplicates: float = 0.001,
units: str = None,
):

self._filename = None
Expand Down Expand Up @@ -446,7 +457,7 @@ def __init__(
df.index = make_unique_index(df.index, offset_duplicates=offset_duplicates)

super().__init__(
name=name, df=df, itemInfo=itemInfo, variable_name=variable_name
name=name, df=df, itemInfo=itemInfo, variable_name=variable_name,override_units=units,
)

@staticmethod
Expand Down Expand Up @@ -494,6 +505,6 @@ def unit_display_name(name: str) -> str:
m
"""

res = name.replace("meter", "m").replace("_per_", "/").replace("sec", "s")
res = name.replace("meter", "m").replace("_per_", "/").replace("second", "s").replace("sec", "s")

return res
72 changes: 71 additions & 1 deletion fmskill/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def scatter(
title: str = "",
xlabel: str = "",
ylabel: str = "",
skill_df: object = None,
units: str = "",
binsize: float = None,
nbins: int = None,
**kwargs,
Expand Down Expand Up @@ -82,6 +84,10 @@ def scatter(
x-label text on plot, by default None
ylabel : str, optional
y-label text on plot, by default None
skill_df : dataframe, optional
dataframe with skill (stats) results to be added to plot, by default None
units : str, optional
user default units to override default units, eg 'metre', by default None
kwargs
"""
if show_hist==None and show_density==None:
Expand Down Expand Up @@ -268,10 +274,18 @@ def scatter(
plt.grid(
which="both", axis="both", linestyle=":", linewidth="0.2", color="grey"
)
max_cbar=None
if (show_hist or (show_density and show_points)):
cbar = plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label("# points")
ticks = cbar.ax.get_yticks()
max_cbar=ticks[-1]
cbar.set_label("# points")

plt.title(title)
# Add skill table
if skill_df != None:
_plot_summary_table(skill_df,units,max_cbar=max_cbar)


elif backend == "plotly": # pragma: no cover
import plotly.graph_objects as go
Expand Down Expand Up @@ -496,3 +510,59 @@ def _scatter_density(x, y, binsize: float = 0.1, method: str = "linear"):
Z_grid[(Z_grid < 0)] = 0

return Z_grid

def _plot_summary_table(skill_df,units,max_cbar):
stats_with_units=["bias", "rmse", "urmse", "mae"]
max_str_len = skill_df.columns.str.len().max()
lines = []
if len(skill_df)>1:
raise Exception('''`skill_table` kword can only be used for comparisons between 1 model and 1 measurement.
Please add `model`, `variable` and `observation` kwords where required''')

for col in skill_df.columns:
if col == "model" or col == "variable":
continue
if col in stats_with_units:
#if statistic has dimensions, then add units
item_unit=units
else:
#else, add empty space (for fomatting)
item_unit=' '
if col=="n":
# Number of samples, integer, else, 2 decimals
decimals=f'.{0}f'
else:
decimals=f'.{2}f'
lines.append(
f"{(col.ljust(max_str_len)).upper()} {np.round(skill_df[col].values[0],2):{decimals}} {item_unit}"
)

text_ = "\n".join(lines)

if max_cbar==None:
x=0.93
elif max_cbar<1e3:
x=0.99
elif max_cbar<1e4:
x=1.01
elif max_cbar<1e5:
x=1.03
elif max_cbar<1e6:
x=1.05
else:
#When more than 1e6 samples, matplotlib changes to scientific notation
x=0.97

plt.gcf().text(
x,
0.6,
text_,
bbox={
"facecolor": "blue",
"edgecolor": "k",
"boxstyle": "round",
"alpha": 0.05,
},
fontsize=12,
family="monospace",
)
1 change: 1 addition & 0 deletions tests/test_eum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ def test_units_display_name():

assert unit_display_name("meter") == "m"
assert unit_display_name("meter_per_sec") == "m/s"
assert unit_display_name("second") == "s"
1 change: 1 addition & 0 deletions tests/test_multimodelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def test_mm_scatter(cc):
cc.scatter(model="SW_2", show_points=0.75)
cc.scatter(model="SW_2", show_density=True)
cc.scatter(model="SW_2", show_points=0.75, show_density=True)
cc.scatter(model="SW_2", observation='HKNA',skill_table=True)
# cc.scatter(model="SW_2", binsize=0.5, backend="plotly")
assert True
plt.close("all")
Expand Down
1 change: 1 addition & 0 deletions tests/test_multivariable_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def test_mv_mm_mean_skill(cc):
def test_mv_mm_scatter(cc):
cc.scatter(model="SW_1", variable="Wind_speed")
cc.scatter(model="SW_1", variable="Wind_speed", show_density=True)
cc.scatter(model="SW_1", variable="Wind_speed", observation = 'F16_wind', skill_table=True)
assert True
plt.close("all")

Expand Down
20 changes: 20 additions & 0 deletions tests/test_pointobservation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import pytest
import sys
import mikeio

from fmskill.observation import PointObservation

Expand All @@ -17,6 +18,18 @@ def test_from_dfs0(klagshamn):
o2 = PointObservation(klagshamn, item="Water Level", x=366844, y=6154291)
assert o1.n_points == o2.n_points

o3 = PointObservation(klagshamn, item="Water Level", x=366844, y=6154291, units='meter')
assert o3.override_units == o2.itemInfo.unit.name

o4 = PointObservation(klagshamn, item="Water Level", x=366844, y=6154291)
assert o4.override_units == None

o5 = PointObservation(klagshamn, item="Water Level", x=366844, y=6154291)
assert o5._unit_text() == 'Water Level [m]'

o6 = PointObservation(klagshamn, item="Water Level", x=366844, y=6154291, units='inches')
assert o6._unit_text() == "Water Level [inches]"


def test_from_df(klagshamn):
o1 = PointObservation(klagshamn, item=0, x=366844, y=6154291, name="Klagshamn1")
Expand All @@ -35,6 +48,13 @@ def test_from_df(klagshamn):
o3 = PointObservation(s, x=366844, y=6154291, name="Klagshamn3")
assert o1.n_points == o3.n_points

o4 = PointObservation(df, item="Water Level", x=366844, y=6154291, units='metre')
assert o4.override_units == 'metre'

o5 = PointObservation(df, item="Water Level", x=366844, y=6154291, units='inches')
o5.itemInfo = mikeio.ItemInfo(mikeio.EUMType.Water_Level)
assert o5._unit_text() == "Water Level [inches]"


@pytest.mark.skipif("shapely" not in sys.modules, reason="requires the shapely")
def test_coordinates(klagshamn):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_trackobservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ def test_read(c2):
assert len(o1.x) == o1.n_points
assert o1.name == "c2"
assert pytest.approx(o1.values.max()) == 17.67
assert o1.override_units == None
o2 = TrackObservation(c2, item=2, name="c2",units='inches/hour')
assert o2.override_units == 'inches/hour'
assert o2._unit_text() == "Wind speed [inches/hour]"



def test_from_df():
Expand Down