Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dataset search for sunpy 2.1 #89

Merged
merged 7 commits into from
Feb 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ variables:
CI_BUILD_ID: $(Build.BuildId)
# These will need to be changed for your project.
CI_BUILD_URL: "https://dev.azure.com/DKISTDC/dkist/_build/results?buildId=$(Build.BuildId)"
PIP_EXTRA_INDEX_URL: "https://pkgs.dev.azure.com/sunpy/ndcube/_packaging/ndcube/pypi/simple/"

# These resources need to be setup on the Azure Pipeline website
# as a service connection (if it has not been already).
Expand All @@ -31,14 +30,19 @@ jobs:
parameters:
coverage: codecov
envs:
- linux: py37
name: linux_37

- macos: py38
name: macos_38

- linux: py37
name: linux_37
# - windows: py38
# name: win_38

- linux: py39
name: linux_39

- linux: py38
name: linux_38
- linux: py39-devdeps

- linux: codestyle

Expand All @@ -61,5 +65,6 @@ jobs:
- wheels_universal
dependsOn:
- linux_37
- linux_38
- macos_38
# - win_38
- linux_39
2 changes: 2 additions & 0 deletions changelog/89.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Update Fido client for changes in sunpy 2.1; bump the sunpy dependancy to at
least 2.1rc3.
2 changes: 1 addition & 1 deletion dkist/net/attr_walker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import astropy.units as u
from sunpy.net.attr import AttrAnd, AttrOr, AttrWalker, DataAttr
from sunpy.net.attrs import Instrument, Level, Physobs, Time, Wavelength
from sunpy.net.attrs import Instrument, Level, Physobs, Provider, Time, Wavelength

from .attrs import *

Expand Down
10 changes: 1 addition & 9 deletions dkist/net/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,11 @@
from sunpy.net.attr import DataAttr as _DataAttr
from sunpy.net.attr import Range as _Range
from sunpy.net.attr import SimpleAttr as _SimpleAttr
from sunpy.net.vso.attrs import Provider

__all__ = ['Dataset', 'WavelengthBand', 'Embargoed', 'Observable',
'Experiment', 'Proposal', 'TargetType', 'Recipe',
'FriedParameter', 'PolarimetricAccuracy', 'ExposureTime',
'EmbargoEndTime', 'BrowseMovie', 'BoundingBox', 'Provider']


# The attrs we are using from VSO should appear like they are defined in this
# module for documentation purposes. These should really be moved in sunpy from
# a.vso. to a.?
for attr in [Provider]:
attr.__module__ = __name__
'EmbargoEndTime', 'BrowseMovie', 'BoundingBox']


# SimpleAttrs
Expand Down
102 changes: 22 additions & 80 deletions dkist/net/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
import json
import urllib.parse
import urllib.request
from typing import Any, Mapping, Iterable
from collections import defaultdict

import astropy.table
from sunpy.net import attr
from sunpy.net import attrs as sattrs
from sunpy.net.base_client import BaseClient, BaseQueryResponse
from sunpy.net.base_client import BaseClient, QueryResponseTable

from . import attrs as dattrs
from .attr_walker import walker

__all__ = ['DKISTQueryReponse', 'DKISTDatasetClient']


class DKISTQueryReponse(BaseQueryResponse):
class DKISTQueryResponseTable(QueryResponseTable):
"""
Results of a DKIST Dataset search.
"""
Expand All @@ -26,7 +26,7 @@ class DKISTQueryReponse(BaseQueryResponse):
_core_keys = ("Start Time", "End Time", "Instrument", "Wavelength Min", "Wavelength Max")

# Map the keys in the response to human friendly ones.
key_map = {
key_map: Mapping[str, str] = {
"asdfObjectKey": "asdf Filename",
"boundingBox": "Bounding Box",
"browseMovieObjectKey": "Movie Filename",
Expand Down Expand Up @@ -64,108 +64,50 @@ class DKISTQueryReponse(BaseQueryResponse):
"wavelengthMin": "Wavelength Min"
}

def __init__(self, table=None):
self.table = table or astropy.table.Table()
self._client = None

@classmethod
def from_results(cls, results):
res = cls()
res._append_results(results)
return res

def _append_results(self, results):
def from_results(cls, results: Iterable[Mapping[str, Any]], *, client: "DKISTDatasetClient") -> "DKISTQueryResponseTable":
"""
Append a list of results from the API.

This method translates the API names into ones similar to the query attrs.

Parameters
----------
results : `list`
A list of dicts as returned by the dataset search API.
Construct the results table from the API results.
"""
# TODO: Follow the other sunpy clients and make wavelength and len-2 Quantity
# Also map Time to Time objects etc
new_results = defaultdict(list)
for result in results:
for key, value in result.items():
new_results[self.key_map[key]].append(value)

full_table = astropy.table.Table(data=new_results)

self.table = astropy.table.vstack(full_table, self.table)

@property
def client(self):
if self._client is None:
self.client = DKISTDatasetClient()
return self._client

@client.setter
def client(self, value):
self._client = value

@property
def blocks(self):
return list(self.table.iterrows())

def __len__(self):
return len(self.table)

def __getitem__(self, item):
return type(self)(self.table[item])

def __iter__(self):
return (t for t in [self])

def __eq__(self, o):
return self.table.__eq__(o)

def build_table(self):
return self.table

def response_block_properties(self):
"""
Set of class attributes on all the response blocks.
"""
raise NotImplementedError()

def __str__(self):
"""Print out human-readable summary of records retrieved."""
if len(self) == 0:
return str(self.table)
return "\n".join(self.build_table()[self._core_keys].pformat(max_width=200,
show_dtype=False))
new_results[cls.key_map[key]].append(value)

def _repr_html_(self):
if len(self) == 0:
return self.table._repr_html_()
return self.table[self._core_keys]._repr_html_()
return cls(new_results, client=client)


class DKISTDatasetClient(BaseClient):
"""
Search DKIST datasets and retrie metadata files describing them.
"""

_BASE_URL = os.environ.get("DKIST_DATASET_ENDPOINT", "")
_BASE_URL = os.environ.get("DKIST_DATASET_ENDPOINT", "https://dkistdcapi2.colorado.edu/datasets/v1")

def search(self, *args):
def search(self, *args) -> DKISTQueryResponseTable:
"""
Search for datasets provided by the DKIST data centre.
"""
query = attr.and_(*args)
queries = walker.create(query)

results = DKISTQueryReponse()
results = []
for url_parameters in queries:
query_string = urllib.parse.urlencode(url_parameters)

full_url = f"{self._BASE_URL}?{query_string}"
data = urllib.request.urlopen(full_url)
data = json.loads(data.read())
results._append_results(data["searchResults"])
results += data["searchResults"]

return results
res = DKISTQueryResponseTable.from_results(results, client=self)
all_cols: Iterable[str] = list(res.colnames)
first_names = [n for n in res._core_keys if n in all_cols]
extra_cols = [col for col in all_cols if col not in first_names]
all_cols = first_names + extra_cols
return res[[col for col in all_cols]]

def fetch(self, *query_results, path=None, overwrite=False, progress=True,
max_conn=5, downloader=None, wait=True, **kwargs):
Expand Down Expand Up @@ -200,7 +142,7 @@ def fetch(self, *query_results, path=None, overwrite=False, progress=True,
raise NotImplementedError("Download of asdf files is not yet implemented.")

@classmethod
def _can_handle_query(cls, *query):
def _can_handle_query(cls, *query) -> bool:
# This enables the client to register what kind of searches it can
# handle, to prevent Fido using the incorrect client.
from sunpy.net import attrs as a
Expand Down Expand Up @@ -240,7 +182,7 @@ def register_values(cls):
Known search values for DKIST data, currently manually specified.
"""
return {
sattrs.vso.Provider: [("DKIST", "Data provided by the DKIST Data Center")],
sattrs.Provider: [("DKIST", "Data provided by the DKIST Data Center")],
# instrumentNames
sattrs.Instrument: [("VBI", "Visible Broadband Imager"),
("VISP", "Visible Spectro-Polarimeter"),
Expand Down
4 changes: 2 additions & 2 deletions dkist/net/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def api_param_names():
a.Instrument: ('instrumentNames',),
a.Wavelength: ('wavelengthMinMin', 'wavelengthMaxMax'),
a.Physobs: ('hasAllStokes',),
a.Provider: tuple(),
da.Dataset: ('datasetIds',),
da.WavelengthBand: ('filterWavelengths',),
da.Observable: ('observables',),
Expand All @@ -40,5 +41,4 @@ def api_param_names():
da.PolarimetricAccuracy: ('qualityAveragePolarimetricAccuracyMin', 'qualityAveragePolarimetricAccuracyMax'),
da.ExposureTime: ('exposureTimeMin', 'exposureTimeMax'),
da.EmbargoEndTime: ('embargoEndDateMin', 'embargoEndDateMax'),
da.Provider: tuple(),
}
}
2 changes: 2 additions & 0 deletions dkist/net/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def query_and(draw, stattrs=st.lists(st.sampled_from(_supported_attr_types()),
"""
attr_types = draw(stattrs)
query_attrs = list(map(draw, map(st.from_type, attr_types)))
assume(not(len(query_attrs) == 1 and isinstance(query_attrs[0], a.Time)))
return attr.and_(*query_attrs)


Expand All @@ -96,6 +97,7 @@ def query_or(draw, stattrs=st.lists(st.sampled_from(_supported_attr_types()),
"""
attr_types = draw(stattrs)
query_attrs = list(map(draw, map(st.from_type, attr_types)))
assume(not(any(isinstance(q, a.Time) for q in query_attrs)))
return attr.or_(*query_attrs)


Expand Down
48 changes: 22 additions & 26 deletions dkist/net/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

from sunpy.net import Fido, attr
from sunpy.net import attrs as a
from sunpy.net.base_client import QueryResponseRow
from sunpy.tests.helpers import no_vso

from dkist.net.client import DKISTDatasetClient, DKISTQueryReponse
from dkist.net.client import DKISTDatasetClient, DKISTQueryResponseTable
from dkist.net.tests import strategies as dst # noqa


Expand All @@ -17,16 +18,16 @@ def client():
return DKISTDatasetClient()


@pytest.mark.remote_data
@pytest.mark.skip
@pytest.mark.remote_data
def test_search(client):
# TODO: Write an online test to verify real behaviour once there is stable data
res = client.search(a.Time("2019/01/01", "2021/01/01"))
print(res)


@pytest.fixture
def empty_query_response():
return DKISTQueryReponse()
return DKISTQueryResponseTable()


@pytest.fixture
Expand Down Expand Up @@ -88,24 +89,16 @@ def mocked_client(mocker, client, example_api_response):
return client


def test_append_query_response(empty_query_response, example_api_response):
qr = empty_query_response
qr._append_results(example_api_response["searchResults"])
def test_query_response_from_results(empty_query_response, example_api_response):
dclient = DKISTDatasetClient()
qr = DKISTQueryResponseTable.from_results(example_api_response["searchResults"], client=dclient)

assert len(qr) == 1
assert isinstance(qr.client, DKISTDatasetClient)
dclient = DKISTDatasetClient()
qr.client = dclient
assert qr.client is dclient
assert qr.build_table() is qr.table
assert len(qr.table) == len(qr)
assert isinstance(qr[0], DKISTQueryReponse)
assert not set(qr.table.columns).difference(DKISTQueryReponse.key_map.values())
assert set(qr.table.columns).isdisjoint(DKISTQueryReponse.key_map.keys())
assert all(x in str(qr) for x in DKISTQueryReponse._core_keys)
assert all(x in qr._repr_html_() for x in DKISTQueryReponse._core_keys)
assert isinstance(qr.blocks, list)
assert qr.blocks == list(qr.table.iterrows())
assert isinstance(qr[0], QueryResponseRow)
assert not set(qr.colnames).difference(DKISTQueryResponseTable.key_map.values())
assert set(qr.colnames).isdisjoint(DKISTQueryResponseTable.key_map.keys())


def test_length_0_qr(empty_query_response):
Expand Down Expand Up @@ -133,23 +126,26 @@ def test_apply_or_and(s):
assert isinstance(s, (attr.AttrOr, attr.DataAttr, attr.AttrAnd))


@settings(suppress_health_check=[HealthCheck.too_slow])
@settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture])
@given(dst.query_and())
def test_search_query_and(mocked_client, query):
res = mocked_client.search(query)
assert isinstance(res, DKISTQueryReponse)
assert isinstance(res, DKISTQueryResponseTable)
assert len(res) == 1


@settings(suppress_health_check=[HealthCheck.too_slow])
@settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture])
@given(dst.query_or_composite())
def test_search_query_or(mocked_client, query):
res = mocked_client.search(query)
assert isinstance(res, DKISTQueryReponse)
assert len(res) == 1
assert isinstance(res, DKISTQueryResponseTable)
if isinstance(query, attr.AttrOr):
assert len(res) == len(query.attrs)
else:
assert len(res) == 1


@settings(suppress_health_check=[HealthCheck.too_slow])
@settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture])
@given(dst.query_and())
def test_can_handle_query(client, query):
# Can handle query never gets passed an AttrOr
Expand All @@ -174,12 +170,12 @@ def test_cant_handle_query(client, query):


@no_vso
@settings(suppress_health_check=[HealthCheck.too_slow])
@settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture])
@given(st.one_of(dst.query_and(), dst.query_or(), dst.query_or_composite()))
def test_fido_valid(mocker, mocked_client, query):
# Test that Fido is passing through our queries to our client
mocked_search = mocker.patch('dkist.net.client.DKISTDatasetClient.search')
mocked_search.return_value = DKISTQueryReponse()
mocked_search.return_value = DKISTQueryResponseTable()

Fido.search(query)
assert mocked_search.called
Expand Down
Loading