Skip to content

Commit

Permalink
Merge branch 'station-summary' into 'main'
Browse files Browse the repository at this point in the history
REFACTOR: Merge duplicated code in StationSummary and compute_station_metrics.

Closes usgs#1017

See merge request ghsc/esi/groundmotion-processing!1111
  • Loading branch information
emthompson-usgs committed Nov 17, 2022
2 parents f9d9d3c + 08726d0 commit 95c4930
Show file tree
Hide file tree
Showing 23 changed files with 514 additions and 470 deletions.
407 changes: 263 additions & 144 deletions src/gmprocess/metrics/station_summary.py

Large diffs are not rendered by default.

143 changes: 10 additions & 133 deletions src/gmprocess/subcommands/compute_station_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
import pathlib

from gmprocess.subcommands.lazy_loader import LazyLoader
from gmprocess.metrics.station_summary import get_ps2ff_interpolation

np = LazyLoader("np", globals(), "numpy")
spint = LazyLoader("spint", globals(), "scipy.interpolate")
ob = LazyLoader("ob", globals(), "obspy.geodetics.base")
oqgeo = LazyLoader("oqgeo", globals(), "openquake.hazardlib.geo.geodetic")
rupt = LazyLoader("rupt", globals(), "esi_utils_rupture")
ps2ff = LazyLoader("ps2ff", globals(), "ps2ff")

arg_dicts = LazyLoader("arg_dicts", globals(), "gmprocess.subcommands.arg_dicts")
base = LazyLoader("base", globals(), "gmprocess.subcommands.base")
Expand Down Expand Up @@ -97,9 +96,10 @@ def _event_station_metrics(self, event):
rupture_file = str(rupture_file)
rupture = rupt.factory.get_rupture(origin, rupture_file)
if isinstance(rupture, rupt.point_rupture.PointRupture):
self._get_ps2ff_splines()

self._get_labels()
self._get_ps2ff(event)
else:
self.rrup_interp = None
self.rjb_interp = None

for station_id in station_list:
streams = self.workspace.getStreams(
Expand All @@ -116,102 +116,17 @@ def _event_station_metrics(self, event):
continue

for st in streams:
geo_tuple = ob.gps2dist_azimuth(
st[0].stats.coordinates.latitude,
st[0].stats.coordinates.longitude,
origin.lat,
origin.lon,
)
sta_repi = geo_tuple[0] / M_PER_KM
sta_baz = geo_tuple[1]
sta_rhyp = oqgeo.distance(
st[0].stats.coordinates.longitude,
st[0].stats.coordinates.latitude,
-st[0].stats.coordinates.elevation / M_PER_KM,
origin.lon,
origin.lat,
origin.depth,
)

if isinstance(rupture, rupt.point_rupture.PointRupture):
rjb_hat = self.rjb_spline(sta_repi)
rjb_mean = rjb_hat[0]
rjb_var = rjb_hat[1]
rrup_hat = self.rrup_spline(sta_repi)
rrup_mean = rrup_hat[0]
rrup_var = rrup_hat[1]
gc2_rx = np.full_like(rjb_mean, np.nan)
gc2_ry = np.full_like(rjb_mean, np.nan)
gc2_ry0 = np.full_like(rjb_mean, np.nan)
gc2_U = np.full_like(rjb_mean, np.nan)
gc2_T = np.full_like(rjb_mean, np.nan)
else:
rrup_mean, rrup_var = rupture.computeRrup(
np.array([st[0].stats.coordinates.longitude]),
np.array([st[0].stats.coordinates.latitude]),
utils.constants.ELEVATION_FOR_DISTANCE_CALCS,
)
rjb_mean, rjb_var = rupture.computeRjb(
np.array([st[0].stats.coordinates.longitude]),
np.array([st[0].stats.coordinates.latitude]),
utils.constants.ELEVATION_FOR_DISTANCE_CALCS,
)
rrup_var = np.full_like(rrup_mean, np.nan)
rjb_var = np.full_like(rjb_mean, np.nan)
gc2_dict = rupture.computeGC2(
np.array([st[0].stats.coordinates.longitude]),
np.array([st[0].stats.coordinates.latitude]),
utils.constants.ELEVATION_FOR_DISTANCE_CALCS,
)
gc2_rx = gc2_dict["rx"]
gc2_ry = gc2_dict["ry"]
gc2_ry0 = gc2_dict["ry0"]
gc2_U = gc2_dict["U"]
gc2_T = gc2_dict["T"]

# If we don't have a point rupture, then back azimuth needs
# to be calculated to the closest point on the rupture
dists = []
bazs = []
for quad in rupture._quadrilaterals:
P0, P1, P2, P3 = quad
for point in [P0, P1]:
dist, az, baz = ob.gps2dist_azimuth(
point.y,
point.x,
st[0].stats.coordinates.latitude,
st[0].stats.coordinates.longitude,
)
dists.append(dist)
bazs.append(baz)
sta_baz = bazs[np.argmin(dists)]

streamid = st.get_id()
logging.info(f"Calculating station metrics for {streamid}...")
summary = station_summary.StationSummary.from_config(
st,
event=event,
config=config,
calc_waveform_metrics=False,
calc_station_metrics=False,
calc_station_metrics=True,
rupture=rupture,
rrup_interp=self.rrup_interp,
rjb_interp=self.rjb_interp,
)

summary._distances = {
"epicentral": sta_repi,
"hypocentral": sta_rhyp,
"rupture": rrup_mean,
"rupture_var": rrup_var,
"joyner_boore": rjb_mean,
"joyner_boore_var": rjb_var,
"gc2_rx": gc2_rx,
"gc2_ry": gc2_ry,
"gc2_ry0": gc2_ry0,
"gc2_U": gc2_U,
"gc2_T": gc2_T,
}
summary._back_azimuth = sta_baz

xmlstr = summary.get_station_xml()
if config["read"]["use_streamcollection"]:
chancode = st.get_inst()
Expand All @@ -237,43 +152,5 @@ def _event_station_metrics(self, event):
self.workspace.close()
return event.id

def _get_ps2ff_splines(self):
# TODO: Make these options configurable in config file.
mscale = ps2ff.constants.MagScaling.WC94
smech = ps2ff.constants.Mechanism.A
aspect = 1.7
mindip_deg = 10.0
maxdip_deg = 90.0
mindip = mindip_deg * np.pi / 180.0
maxdip = maxdip_deg * np.pi / 180.0
repi, Rjb_hat, Rrup_hat, Rjb_var, Rrup_var = ps2ff.run.single_event_adjustment(
self.origin.mag,
self.origin.depth,
ar=aspect,
mechanism=smech,
mag_scaling=mscale,
n_repi=30,
min_repi=0.1,
max_repi=2000,
nxny=7,
n_theta=19,
n_dip=4,
min_dip=mindip,
max_dip=maxdip,
n_eps=5,
trunc=2,
)
self.rjb_spline = spint.interp1d(
repi,
np.vstack((Rjb_hat, Rjb_var)),
kind="linear",
copy=False,
assume_sorted=True,
)
self.rrup_spline = spint.interp1d(
repi,
np.vstack((Rrup_hat, Rrup_var)),
kind="linear",
copy=False,
assume_sorted=True,
)
def _get_ps2ff(self, event):
self.rrup_interp, self.rjb_interp = get_ps2ff_interpolation(event)
2 changes: 1 addition & 1 deletion tests/gmprocess/io/asdf/stream_workspace_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_stream_workspace():
stream1 = raw_streams[0]
# Get metrics from station summary for raw streams
config["metrics"]["sa"]["periods"]["defined_periods"] = [0.3, 1.0, 3.0]
summary1 = StationSummary.from_config(stream1, config=config)
summary1 = StationSummary.from_config(stream1, config=config, event=event)
s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"])
array1 = s1_df_in["Result"].to_numpy()

Expand Down
126 changes: 10 additions & 116 deletions tests/gmprocess/io/stream_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ def test():
assert grouped_streams[0].count() == 3

# Test for channel grouping with more file types
datafiles, _ = read_data_dir(
"geonet", "us1000778i", "20161113_110313_THZ_20.V2A"
)
datafiles, _ = read_data_dir("geonet", "us1000778i", "20161113_110313_THZ_20.V2A")
datafile = datafiles[0]
streams += read_geonet(datafile)
grouped_streams = StreamCollection(streams)
Expand Down Expand Up @@ -148,14 +146,23 @@ def test_to_dataframe():
[
"ELEVATION",
"EPICENTRAL_DISTANCE",
"GC2_RX_DISTANCE",
"GC2_RY0_DISTANCE",
"GC2_RY_DISTANCE",
"GC2_T_DISTANCE",
"GC2_U_DISTANCE",
"GREATER_OF_TWO_HORIZONTALS",
"H1",
"H2",
"HYPOCENTRAL_DISTANCE",
"JOYNER_BOORE_DISTANCE",
"JOYNER_BOORE_VAR_DISTANCE",
"LAT",
"LON",
"NAME",
"NETID",
"RUPTURE_DISTANCE",
"RUPTURE_VAR_DISTANCE",
"SOURCE",
"STATION",
"Z",
Expand All @@ -166,119 +173,6 @@ def test_to_dataframe():
header2 = set(df1.columns.levels[1])
assert header1 == cmp1
assert header2 == cmp2
# idx = 0
# for s in df1.columns.levels:
# for col in s:
# try:
# assert col == target_levels[idx]
# except Exception as e:
# x = 1
# idx += 1

# This was previously not being tested
"""imts = ['PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)']
imcs = ['GREATER_OF_TWO_HORIZONTALS', 'CHANNELS']
homedir = os.path.dirname(os.path.abspath(__file__))
datapath = os.path.join('data', 'testdata', 'knet')
knet_dir = pkg_resources.resource_filename('gmprocess', datapath)
# make dataframe
knet_dataframe = directory_to_dataframe(knet_dir)
# read and group streams
streams = []
for filepath in glob.glob(os.path.join(knet_dir, "*")):
streams += read_data(filepath)
grouped_streams = StreamCollection(streams)
for idx, stream in enumerate(grouped_streams):
stream = process_streams(stream)
# set meta_data
station = stream[0].stats['station']
name_str = stream[0].stats['standard']['station_name']
source = stream[0].stats.standard['source']
network = stream[0].stats['network']
latitude = stream[0].stats['coordinates']['latitude']
longitude = stream[0].stats['coordinates']['longitude']
# metadata from the dataframe
knet_station = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'STATION'][0]
knet_name_str = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'NAME'][0]
knet_source = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'SOURCE'][0]
knet_network = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'NETID'][0]
knet_latitude = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'LAT'][0]
knet_longitude = knet_dataframe.iloc[
idx, knet_dataframe.columns.get_level_values(0) == 'LON'][0]
assert knet_station == station
assert knet_name_str == name_str
assert knet_source == source
assert knet_network == network
assert knet_latitude == latitude
assert knet_longitude == longitude
stream_summary = StationSummary.from_stream(stream, imcs, imts)
pgms = stream_summary.pgms
for imt in pgms:
for imc in pgms[imt]:
multi_idx = np.logical_and(
knet_dataframe.columns.get_level_values(1) == imt,
knet_dataframe.columns.get_level_values(0) == imc)
dataframe_value = knet_dataframe.iloc[idx, multi_idx].to_list()[
0]
streamsummary_value = pgms[imt][imc]
assert dataframe_value == streamsummary_value
datapath = os.path.join('data', 'testdata', 'cwb')
cwb_dir = pkg_resources.resource_filename('gmprocess', datapath)
# make dataframe
cwb_dataframe = directory_to_dataframe(cwb_dir, lat=24.14, lon=121)
# read and group streams
streams = []
for filepath in glob.glob(os.path.join(cwb_dir, "*")):
streams += read_data(filepath)
grouped_streams = StreamCollection(streams)
for idx, stream in enumerate(grouped_streams):
stream = process_streams(stream)
# set meta_data
station = stream[0].stats['station']
name_str = stream[0].stats['standard']['station_name']
source = stream[0].stats.standard['source']
network = stream[0].stats['network']
latitude = stream[0].stats['coordinates']['latitude']
longitude = stream[0].stats['coordinates']['longitude']
# metadata from the dataframe
cwb_station = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'STATION'][0]
cwb_name_str = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'NAME'][0]
cwb_source = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'SOURCE'][0]
cwb_network = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'NETID'][0]
cwb_latitude = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'LAT'][0]
cwb_longitude = cwb_dataframe.iloc[
idx, cwb_dataframe.columns.get_level_values(0) == 'LON'][0]
assert cwb_station == station
assert cwb_name_str == name_str
assert cwb_source == source
assert cwb_network == network
assert cwb_latitude == latitude
assert cwb_longitude == longitude
stream_summary = StationSummary.from_stream(stream, imcs, imts)
pgms = stream_summary.pgms
for imt in pgms:
for imc in pgms[imt]:
multi_idx = np.logical_and(
cwb_dataframe.columns.get_level_values(1) == imt,
cwb_dataframe.columns.get_level_values(0) == imc)
dataframe_value = cwb_dataframe.iloc[idx, multi_idx].to_list()[
0]
streamsummary_value = pgms[imt][imc]
assert dataframe_value == streamsummary_value"""


if __name__ == "__main__":
Expand Down
21 changes: 7 additions & 14 deletions tests/gmprocess/metrics/high_freq_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from gmprocess.core.stationtrace import StationTrace
from gmprocess.metrics.station_summary import StationSummary
from gmprocess.utils.constants import TEST_DATA_DIR
from gmprocess.utils.event import ScalarEvent


def read_at2(dfile, horient=0.0):
Expand Down Expand Up @@ -84,6 +85,11 @@ def read_at2(dfile, horient=0.0):


def test_high_freq_sa():
# Dummy event
event = ScalarEvent()
event.fromParams(
id="", time="20001-01 00:00:00", lat=0, lon=0, depth=0, magnitude=0
)
t1 = time.time()
datadir = TEST_DATA_DIR / "high_freq_sa"
fnames = [
Expand All @@ -98,12 +104,11 @@ def test_high_freq_sa():

# shorten window to speed up tests
for tr in st:
# tr.data = tr.data[3000:12000]
tr.data = tr.data[5320:9260]

periods = [0.01, 0.02, 0.03, 0.05, 0.075, 0.1, 0.15, 0.2]
imt_list = [f"sa{p}" for p in periods]
station = StationSummary.from_stream(st, ["rotd50"], imt_list)
station = StationSummary.from_stream(st, ["rotd50"], imt_list, event=event)
# I believe that units are %g in the following table:
pgmdf = station.pgms
imt_strs = [f"SA({p:.3f})" for p in periods]
Expand All @@ -127,18 +132,6 @@ def test_high_freq_sa():
"test_sa": np.array(test_sa) / 100,
}

# For visualization... not testing:
if False:
import matplotlib.pyplot as plt

# %matplotlib osx

plt.loglog(test_data["periods"], test_data["target_sa"], "o-", label="PEER")
plt.loglog(test_data["periods"], test_data["test_sa"], "o-", label="gmprocess")
plt.xlabel("Period, sec")
plt.ylabel("PSA, g")
plt.legend()

np.testing.assert_allclose(test_data["target_sa"], test_data["test_sa"], rtol=0.1)
t2 = time.time()
elapsed = t2 - t1
Expand Down
Loading

0 comments on commit 95c4930

Please sign in to comment.