From 533e2a242d5299cdea32f40dc21ea7576a51899d Mon Sep 17 00:00:00 2001 From: pciturri Date: Tue, 16 May 2023 22:15:11 +0200 Subject: [PATCH] fix: reworked catalog accessor. Retrieving from IRIS ("https://service.iris.edu/fdsnws/event/1/") in text file, which is faster and safer. tests: fixed event list location --- csep/__init__.py | 51 +++----- csep/utils/iris.py | 132 ++++++++++++++++++++ csep/utils/readers.py | 167 ++++++-------------------- tests/artifacts/gCMT/vcr_search.yaml | 43 +++++++ tests/artifacts/gCMT/vcr_summary.yaml | 43 +++++++ tests/test_gcmt.py | 28 ++--- 6 files changed, 286 insertions(+), 178 deletions(-) create mode 100644 csep/utils/iris.py create mode 100644 tests/artifacts/gCMT/vcr_search.yaml create mode 100644 tests/artifacts/gCMT/vcr_summary.yaml diff --git a/csep/__init__.py b/csep/__init__.py index e3056a33..2ce613af 100644 --- a/csep/__init__.py +++ b/csep/__init__.py @@ -311,10 +311,11 @@ def query_bsi(start_time, end_time, min_magnitude=2.50, return bsi + def query_gns(start_time, end_time, min_magnitude=2.950, - min_latitude=-47, max_latitude=-34, - min_longitude=164, max_longitude=180, - max_depth=45.5, + min_latitude=-47, max_latitude=-34, + min_longitude=164, max_longitude=180, + max_depth=45.5, verbose=True, apply_filters=False, **kwargs): """ @@ -360,38 +361,26 @@ def query_gns(start_time, end_time, min_magnitude=2.950, return gns -def query_gcmt(start_time, end_time, min_magnitude=5.0, min_depth=None, + +def query_gcmt(start_time, end_time, min_magnitude=5.0, max_depth=None, - catalog_id=None, verbose=True, + catalog_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None): - if min_latitude: - searchshape = 'RECT' - else: - searchshape = 'GLOBAL' - events, creation_time = readers._query_gcmt( - start_year=start_time.year, - start_month=start_time.month, - start_day=start_time.day, - searchshape=searchshape, - start_time=start_time.time().isoformat(), - end_year=end_time.year, - end_month=end_time.month, - end_day=end_time.day, - end_time=end_time.time().isoformat(), - min_mag=min_magnitude, - min_dep=min_depth, - max_dep=max_depth, - left_lon=min_longitude, - right_lon=max_longitude, - bot_lat=min_latitude, - top_lat=max_latitude, - verbose=verbose - ) - catalog = catalogs.CSEPCatalog(data=events, name='ISC Bulletin - gCMT', + + eventlist = readers._query_gcmt(start_time=start_time, + end_time=end_time, + min_magnitude=min_magnitude, + min_latitude=min_latitude, + max_latitude=max_latitude, + min_longitude=min_longitude, + max_longitude=max_longitude, + max_depth=max_depth) + + catalog = catalogs.CSEPCatalog(data=eventlist, + name='gCMT', catalog_id=catalog_id, - date_accessed=creation_time) - catalog.filter([f'magnitude >= {min_magnitude}'], in_place=True) + date_accessed=utc_now_datetime()) return catalog diff --git a/csep/utils/iris.py b/csep/utils/iris.py new file mode 100644 index 00000000..b4a6e2a9 --- /dev/null +++ b/csep/utils/iris.py @@ -0,0 +1,132 @@ +# python imports +from datetime import datetime +from urllib import request +from urllib.parse import urlencode + +# PyCSEP imports +from csep.utils.time_utils import datetime_to_utc_epoch + +HOST_CATALOG = "https://service.iris.edu/fdsnws/event/1/query?" +TIMEOUT = 180 + + +def gcmt_search(format='text', + starttime=None, + endtime=None, + updatedafter=None, + minlatitude=None, + maxlatitude=None, + minlongitude=None, + maxlongitude=None, + latitude=None, + longitude=None, + maxradius=None, + catalog='GCMT', + contributor=None, + maxdepth=1000, + maxmagnitude=10.0, + mindepth=-100, + minmagnitude=0, + offset=1, + orderby='time-asc', + host=None, + verbose=False): + """Search the IRIS database for events matching input criteria. + This search function is a wrapper around the ComCat Web API described here: + https://service.iris.edu/fdsnws/event/1/ + + This function returns a list of SummaryEvent objects, described elsewhere in this package. + Args: + starttime (datetime): + Python datetime - Limit to events on or after the specified start time. + endtime (datetime): + Python datetime - Limit to events on or before the specified end time. + updatedafter (datetime): + Python datetime - Limit to events updated after the specified time. + minlatitude (float): + Limit to events with a latitude larger than the specified minimum. + maxlatitude (float): + Limit to events with a latitude smaller than the specified maximum. + minlongitude (float): + Limit to events with a longitude larger than the specified minimum. + maxlongitude (float): + Limit to events with a longitude smaller than the specified maximum. + latitude (float): + Specify the latitude to be used for a radius search. + longitude (float): + Specify the longitude to be used for a radius search. + maxradius (float): + Limit to events within the specified maximum number of degrees + from the geographic point defined by the latitude and longitude parameters. + catalog (str): + Limit to events from a specified catalog. + contributor (str): + Limit to events contributed by a specified contributor. + maxdepth (float): + Limit to events with depth less than the specified maximum. + maxmagnitude (float): + Limit to events with a magnitude smaller than the specified maximum. + mindepth (float): + Limit to events with depth more than the specified minimum. + minmagnitude (float): + Limit to events with a magnitude larger than the specified minimum. + offset (int): + Return results starting at the event count specified, starting at 1. + orderby (str): + Order the results. The allowed values are: + - time order by origin descending time + - time-asc order by origin ascending time + - magnitude order by descending magnitude + - magnitude-asc order by ascending magnitude + host (str): + Replace default ComCat host (earthquake.usgs.gov) with a custom host. + Returns: + list: List of SummaryEvent() objects. + """ + + # getting the inputargs must be the first line of the method! + inputargs = locals().copy() + newargs = {} + + for key, value in inputargs.items(): + if value is True: + newargs[key] = 'true' + continue + if value is False: + newargs[key] = 'false' + continue + if value is None: + continue + newargs[key] = value + + del newargs['verbose'] + + events = _search_gcmt(**newargs) + + return events + + +def _search_gcmt(**_newargs): + """ + Performs de-query at ISC API and returns event list and access date + + """ + paramstr = urlencode(_newargs) + url = HOST_CATALOG + paramstr + fh = request.urlopen(url, timeout=TIMEOUT) + data = fh.read().decode('utf8').split('\n') + fh.close() + eventlist = [] + for line in data[1:]: + line_ = line.split('|') + if len(line_) != 1: + id_ = line_[0] + time_ = datetime.fromisoformat(line_[1]) + dt = datetime_to_utc_epoch(time_) + lat = float(line_[2]) + lon = float(line_[3]) + depth = float(line_[4]) + mag = float(line_[10]) + eventlist.append((id_, dt, lat, lon, depth, mag)) + + return eventlist \ No newline at end of file diff --git a/csep/utils/readers.py b/csep/utils/readers.py index 920ff2fa..37098574 100644 --- a/csep/utils/readers.py +++ b/csep/utils/readers.py @@ -6,10 +6,6 @@ import csv from itertools import zip_longest import os -import time -from urllib.parse import urlencode -from urllib import request as request_ -import xml.etree.ElementTree as ElementTree # Third-party imports import numpy @@ -19,6 +15,7 @@ strptime_to_utc_epoch, datetime_to_utc_epoch from csep.utils.comcat import search from csep.utils.geonet import gns_search +from csep.utils.iris import gcmt_search from csep.core.regions import QuadtreeGrid2D from csep.core.exceptions import CSEPIOException @@ -572,6 +569,7 @@ def is_header_line(line): return out + def ingv_horus(fname): """ Reader for the INGV (Istituto Nazionale di Geofisica e Vulcanologia - Italy) @@ -702,6 +700,7 @@ def _query_comcat(start_time, end_time, min_magnitude=2.50, return eventlist + def _query_bsi(start_time, end_time, min_magnitude=2.50, min_latitude=32.0, max_latitude=50.0, min_longitude=2.0, max_longitude=21.0, @@ -722,7 +721,7 @@ def _query_bsi(start_time, end_time, min_magnitude=2.50, return eventlist -# Adding GNS catalog reader + def _query_gns(start_time, end_time, min_magnitude=2.950, min_latitude=-47, max_latitude=-34, min_longitude=164, max_longitude=180, @@ -746,139 +745,40 @@ def _query_gns(start_time, end_time, min_magnitude=2.950, return eventlist - -def _query_gcmt(out_format='QuakeML', - request='COMPREHENSIVE', - searchshape='GLOBAL', - start_year=2020, - start_month=1, - start_day=1, - start_time='00:00:00', - end_year=2022, - end_month=1, - end_day=1, - end_time='23:59:59', - host=None, - include_magnitudes='on', - min_mag=5.95, - max_mag=None, - min_dep=None, - max_dep=None, - left_lon=None, - right_lon=None, - bot_lat=None, - top_lat=None, - req_mag_type='MW', - req_mag_agcy='GCMT', - verbose=False): - """ Return gCMT catalog from ISC online web-portal - - Args: - (follow csep.query_comcat for guidance) - - Returns: - out (csep.core.catalogs.AbstractBaseCatalog): gCMT catalog +def _query_gcmt(start_time, end_time, min_magnitude=3.50, + min_latitude=None, max_latitude=None, + min_longitude=None, max_longitude=None, + max_depth=1000, extra_gcmt_params=None): """ + Return GCMT eventlist from IRIS web service. + For details see "https://service.iris.edu/fdsnws/event/1/" + Args: + start_time (datetime.datetime): start time of catalog query + end_time (datetime.datetime): end time of catalog query + min_magnitude (float): minimum magnitude of query + min_latitude (float): minimum latitude of query + max_latitude (float): maximum latitude of query + min_longitude (float): minimum longitude of query + max_longitude (float): maximum longitude of query + max_depth (float): maximum depth of query + extra_gcmt_params (dict): additional parameters to pass to IRIS search + function - inputargs = locals().copy() - query_args = {} - - HOST_CATALOG = "http://www.isc.ac.uk/cgi-bin/web-db-run?" - TIMEOUT = 180 - - def _search_gcmt(**newargs): - """ - Performs de query at ISC API and returns event list and access date - - """ - paramstr = urlencode(newargs) - url = HOST_CATALOG + paramstr - - try: - fh = request_.urlopen(url, timeout=TIMEOUT) - data = fh.read().decode('utf8') - fh.close() - - try: - root = ElementTree.fromstring(data) - except Exception as msg: - raise Exception('Parse failed for string: %s' % data) - ns = root[0].tag.split('}')[0] + '}' - creation_time = root[0].find(ns + 'creationInfo').find( - ns + 'creationTime').text - creation_time = creation_time.replace('T', ' ') - events_quakeml = root[0].findall(ns + 'event') - events = [] - for feature in events_quakeml: - events.append(_parse_isc_event(feature, ns)) - - except ElementTree.ParseError as msg: - print('Requested URL:', url) - raise Exception('Badly-formed URL. Try downloading again in case ' - 'the hosting server has a request rate limit' - ' "%s"' % msg) - - except Exception as msg: - raise Exception( - 'Error downloading data from url %s. "%s".' % (url, msg)) - - return events, creation_time, url - - for key, value in inputargs.items(): - if value is True: - query_args[key] = 'true' - continue - if value is False: - query_args[key] = 'false' - continue - if value is None: - continue - query_args[key] = value - - del query_args['verbose'] - - start_time = time.time() - if verbose: - print('Accessing ISC API') - - events, creation_time, url = _search_gcmt(**query_args) - - if verbose: - print(f'\tAccess URL: {url}') - print( - f'\tCatalog with {len(events)} events downloaded in ' - f'{(time.time() - start_time):.2f} seconds') - - return events, creation_time - - -def _parse_isc_event(node, ns, mag_author='GCMT'): - """ - Parse event list from quakeML returned from ISC + Returns: + eventlist """ - id_ = node.get('publicID').split('=')[-1] - magnitudes = node.findall(ns + 'magnitude') - mag_gcmt = [i for i in magnitudes if - i.find(ns + 'creationInfo')[0].text == mag_author][0] - - origin_id = mag_gcmt.find(ns + 'originID').text - origins = node.findall(ns + 'origin') + extra_gcmt_params = extra_gcmt_params or {} - origin_gcmt = [i for i in origins if i.attrib['publicID'] == origin_id][0] - - lat = origin_gcmt.find(ns + 'latitude').find(ns + 'value').text - lon = origin_gcmt.find(ns + 'longitude').find(ns + 'value').text - mag = mag_gcmt.find(ns + 'mag').find(ns + 'value').text - depth = origin_gcmt.find(ns + 'depth').find(ns + 'value').text - - dtstr = origin_gcmt.find(ns + 'time').find(ns + 'value').text - date = dtstr.split('T')[0] - time_ = dtstr.split('T')[1][:-1] - dtime = datetime_to_utc_epoch( - datetime.datetime.fromisoformat(date + ' ' + time_ + '0')) - - return id_, dtime, float(lat), float(lon), float(depth) / 1000., float(mag) + eventlist = gcmt_search(minmagnitude=min_magnitude, + minlatitude=min_latitude, + maxlatitude=max_latitude, + minlongitude=min_longitude, + maxlongitude=max_longitude, + starttime=start_time.isoformat(), + endtime=end_time.isoformat(), + maxdepth=max_depth, **extra_gcmt_params) + return eventlist def _parse_datetime_to_zmap(date, time): @@ -917,6 +817,7 @@ def _parse_datetime_to_zmap(date, time): out['second'] = dt.second return out + def quadtree_ascii_loader(ascii_fname): """ Load quadtree forecasted stored as ascii text file diff --git a/tests/artifacts/gCMT/vcr_search.yaml b/tests/artifacts/gCMT/vcr_search.yaml new file mode 100644 index 00000000..833b98f8 --- /dev/null +++ b/tests/artifacts/gCMT/vcr_search.yaml @@ -0,0 +1,43 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - service.iris.edu + User-Agent: + - Python-urllib/3.11 + method: GET + uri: https://service.iris.edu/fdsnws/event/1/query?format=text&starttime=2010-02-26T00%3A00%3A00&endtime=2010-03-02T00%3A00%3A00&catalog=GCMT&maxdepth=1000&maxmagnitude=10.0&mindepth=-100&minmagnitude=7&offset=1&orderby=time-asc + response: + body: + string: '#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog + | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName + + 2844986|2010-02-27T06:35:14|-35.98|-73.15|23.2|GCMT|GCMT||C201002270634A|MW|8.8||OFF + COAST OF CENTRAL CHILE + + 2845026|2010-02-27T08:01:29|-38.09|-75.41|19.9|GCMT|GCMT||S201002270801A|MW|7.4||OFF + COAST OF CENTRAL CHILE + + ' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - Access-Control-Allow-Origin,Access-Control-Allow-Credentials + Connection: + - close + Content-Length: + - '369' + Content-Type: + - text/plain + Date: + - Tue, 16 May 2023 20:08:36 GMT + content-disposition: + - inline; filename="fdsnws-event_2023-05-16T20:08:36Z.txt" + status: + code: 200 + message: '' +version: 1 diff --git a/tests/artifacts/gCMT/vcr_summary.yaml b/tests/artifacts/gCMT/vcr_summary.yaml new file mode 100644 index 00000000..a55ece43 --- /dev/null +++ b/tests/artifacts/gCMT/vcr_summary.yaml @@ -0,0 +1,43 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - service.iris.edu + User-Agent: + - Python-urllib/3.11 + method: GET + uri: https://service.iris.edu/fdsnws/event/1/query?format=text&starttime=2010-02-26T00%3A00%3A00&endtime=2010-03-02T00%3A00%3A00&catalog=GCMT&maxdepth=1000&maxmagnitude=10.0&mindepth=-100&minmagnitude=7&offset=1&orderby=time-asc + response: + body: + string: '#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog + | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName + + 2844986|2010-02-27T06:35:14|-35.98|-73.15|23.2|GCMT|GCMT||C201002270634A|MW|8.8||OFF + COAST OF CENTRAL CHILE + + 2845026|2010-02-27T08:01:29|-38.09|-75.41|19.9|GCMT|GCMT||S201002270801A|MW|7.4||OFF + COAST OF CENTRAL CHILE + + ' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - Access-Control-Allow-Origin,Access-Control-Allow-Credentials + Connection: + - close + Content-Length: + - '369' + Content-Type: + - text/plain + Date: + - Tue, 16 May 2023 20:08:37 GMT + content-disposition: + - inline; filename="fdsnws-event_2023-05-16T20:08:38Z.txt" + status: + code: 200 + message: '' +version: 1 diff --git a/tests/test_gcmt.py b/tests/test_gcmt.py index 0dd3cfdd..48e46fad 100644 --- a/tests/test_gcmt.py +++ b/tests/test_gcmt.py @@ -8,37 +8,37 @@ def gcmt_dir(): - data_dir = os.path.join(root_dir, '../artifacts', 'gCMT') + data_dir = os.path.join(root_dir, 'artifacts', 'gCMT') return data_dir class TestCatalogGetter(unittest.TestCase): - def test_isc_gcmt_search(self): + def test_gcmt_search(self): tape_file = os.path.join(gcmt_dir(), 'vcr_search.yaml') with vcr.use_cassette(tape_file): # Maule, Chile eventlist = \ - _query_gcmt(start_year=2010, start_month=2, start_day=26, - end_year=2010, end_month=2, end_day=28, - min_mag=8.5)[0] - event = eventlist[0] - assert event[0] == '14340585' + _query_gcmt(start_time=datetime(2010, 2, 26), + end_time=datetime(2010, 3, 2), + min_magnitude=7)[0] + event = eventlist + assert event[0] == '2844986' def test_isc_gcmt_summary(self): tape_file = os.path.join(gcmt_dir(), 'vcr_summary.yaml') with vcr.use_cassette(tape_file): eventlist = \ - _query_gcmt(start_year=2010, start_month=2, start_day=26, - end_year=2010, end_month=2, end_day=28, - min_mag=8.5)[0] + _query_gcmt(start_time=datetime(2010, 2, 26), + end_time=datetime(2010, 3, 2), + min_magnitude=7) event = eventlist[0] - cmp = "('14340585', 1267252513600, -35.98, -73.15, 23.2, 8.78)" + cmp = "('2844986', 1267252514000, -35.98, -73.15, 23.2, 8.8)" assert str(event) == cmp - assert event[0] == '14340585' + assert event[0] == '2844986' assert datetime.fromtimestamp( - event[1] / 1000.) == datetime.fromtimestamp(1267252513.600) + event[1] / 1000.) == datetime.fromtimestamp(1267252514) assert event[2] == -35.98 assert event[3] == -73.15 assert event[4] == 23.2 - assert event[5] == 8.78 + assert event[5] == 8.8