Skip to content

Commit

Permalink
Merge pull request astropy#3193 from snbianco/ASB-30325-mast-uris
Browse files Browse the repository at this point in the history
Accept MAST URIs as input to get_cloud_uris()
  • Loading branch information
bsipocz authored Feb 20, 2025
2 parents d619223 + 311197b commit 8192801
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 48 deletions.
14 changes: 14 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
New Tools and Services
----------------------


API changes
-----------

mast
^^^^

- Handle a MAST URI string as input for ``Observations.get_cloud_uri`` and a list of MAST URIs as input for
``Observations.get_cloud_uris``. [#3193]

simbad
^^^^^^

Expand Down Expand Up @@ -35,6 +42,13 @@ ipac.nexsci.nasa_exoplanet_archive

- Fixed InvalidTableError for DI_STARS_EXEP and TD tables. [#3189]

mast
^^^^

- Bugfix where users are unnecessarily warned about a query limit while fetching products in ``MastMissions.get_product_list``. [#3193]

- Bugfix where ``Observations.get_cloud_uri`` and ``Observations.get_cloud_uris`` fail if the MAST relative path is not found. [#3193]

simbad
^^^^^^

Expand Down
21 changes: 10 additions & 11 deletions astroquery/mast/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def __init__(self, provider="AWS", profile=None, verbose=False):
import boto3
import botocore

self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler", "mast:galex", "mast:ps1"]
self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler", "mast:galex", "mast:ps1",
"mast:jwst/product"]

self.boto3 = boto3
self.botocore = botocore
Expand All @@ -77,11 +78,7 @@ def is_supported(self, data_product):
response : bool
Is the product from a supported mission.
"""

for mission in self.supported_missions:
if data_product['dataURI'].lower().startswith(mission):
return True
return False
return any(data_product['dataURI'].lower().startswith(mission) for mission in self.supported_missions)

def get_cloud_uri(self, data_product, include_bucket=True, full_url=False):
"""
Expand All @@ -92,7 +89,7 @@ def get_cloud_uri(self, data_product, include_bucket=True, full_url=False):
Parameters
----------
data_product : `~astropy.table.Row`
data_product : `~astropy.table.Row`, str
Product to be converted into cloud data uri.
include_bucket : bool
Default True. When false returns the path of the file relative to the
Expand All @@ -108,6 +105,8 @@ def get_cloud_uri(self, data_product, include_bucket=True, full_url=False):
Cloud URI generated from the data product. If the product cannot be
found in the cloud, None is returned.
"""
# If data_product is a string, convert to a list
data_product = [data_product] if isinstance(data_product, str) else data_product

uri_list = self.get_cloud_uri_list(data_product, include_bucket=include_bucket, full_url=full_url)

Expand All @@ -124,8 +123,8 @@ def get_cloud_uri_list(self, data_products, include_bucket=True, full_url=False)
Parameters
----------
data_products : `~astropy.table.Table`
Table containing products to be converted into cloud data uris.
data_products : `~astropy.table.Table`, list
Table containing products or list of MAST uris to be converted into cloud data uris.
include_bucket : bool
Default True. When false returns the path of the file relative to the
top level cloud storage location.
Expand All @@ -141,8 +140,8 @@ def get_cloud_uri_list(self, data_products, include_bucket=True, full_url=False)
if data_products includes products not found in the cloud.
"""
s3_client = self.boto3.client('s3', config=self.config)

paths = utils.mast_relative_path(data_products["dataURI"])
data_uris = data_products if isinstance(data_products, list) else data_products['dataURI']
paths = utils.mast_relative_path(data_uris)
if isinstance(paths, str): # Handle the case where only one product was requested
paths = [paths]

Expand Down
9 changes: 5 additions & 4 deletions astroquery/mast/missions.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,15 @@ def _parse_result(self, response, *, verbose=False): # Used by the async_to_syn

if self.service == self._search:
results = self._service_api_connection._parse_result(response, verbose, data_key='results')

# Warn if maximum results are returned
if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
MaxResultsWarning)
elif self.service == self._list_products:
# Results from post_list_products endpoint need to be handled differently
results = Table(response.json()['products'])

if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
MaxResultsWarning)

return results

def _validate_criteria(self, **criteria):
Expand Down
23 changes: 15 additions & 8 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,9 +854,9 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
Parameters
----------
data_products : `~astropy.table.Table`
Table containing products to be converted into cloud data uris. If provided, this will supercede
page_size, page, or any keyword arguments passed in as criteria.
data_products : `~astropy.table.Table`, list
Table containing products or list of MAST uris to be converted into cloud data uris.
If provided, this will supercede page_size, page, or any keyword arguments passed in as criteria.
include_bucket : bool
Default True. When False, returns the path of the file relative to the
top level cloud storage location.
Expand Down Expand Up @@ -920,16 +920,23 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
# Return list of associated data products
data_products = self.get_product_list(obs)

# Filter product list
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension, **filter_products)
if isinstance(data_products, Table):
# Filter product list
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension,
**filter_products)
else: # data_products is a list of URIs
# Warn if trying to supply filters
if filter_products or extension or mrp_only:
warnings.warn('Filtering is not supported when providing a list of MAST URIs. '
'To apply filters, please provide query criteria or a table of data products '
'as returned by `Observations.get_product_list`', InputWarning)

if not len(data_products):
warnings.warn("No matching products to fetch associated cloud URIs.", NoResultsWarning)
warnings.warn('No matching products to fetch associated cloud URIs.', NoResultsWarning)
return

# Remove duplicate products
data_products = utils.remove_duplicate_products(data_products, 'dataURI')

return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url)

def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
Expand All @@ -941,7 +948,7 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
Parameters
----------
data_product : `~astropy.table.Row`
data_product : `~astropy.table.Row`, str
Product to be converted into cloud data uri.
include_bucket : bool
Default True. When false returns the path of the file relative to the
Expand Down
12 changes: 12 additions & 0 deletions astroquery/mast/tests/data/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,15 @@ To generate `~astroquery.mast.tests.data.mission_products.json`, use the followi
>>> resp = utils._simple_request('https://mast.stsci.edu/search/hst/api/v0.1/list_products', {'dataset_ids': 'Z14Z0104T'})
>>> with open('panstarrs_columns.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP

To generate `~astroquery.mast.tests.data.mast_relative_path.json`, use the following:

.. doctest-remote-data::

>>> import json
>>> from astroquery.mast import utils
...
>>> resp = utils._simple_request('https://mast.stsci.edu/api/v0.1/path_lookup/',
... {'uri': ['mast:HST/product/u9o40504m_c3m.fits', 'mast:HST/product/does_not_exist.fits']})
>>> with open('mast_relative_path.json', 'w') as file:
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP
10 changes: 10 additions & 0 deletions astroquery/mast/tests/data/mast_relative_path.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"mast:HST/product/u9o40504m_c3m.fits": {
"status_code": 200,
"path": "/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits"
},
"mast:HST/product/does_not_exist.fits": {
"status_code": 404,
"path": null
}
}
96 changes: 95 additions & 1 deletion astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
from shutil import copyfile
from unittest.mock import patch

import pytest

Expand All @@ -16,7 +17,8 @@

from astroquery.mast.services import _json_to_table
from astroquery.utils.mocks import MockResponse
from astroquery.exceptions import InvalidQueryError, InputWarning, MaxResultsWarning
from astroquery.exceptions import (InvalidQueryError, InputWarning, MaxResultsWarning, NoResultsWarning,
RemoteServiceError)

from astroquery import mast

Expand Down Expand Up @@ -48,6 +50,7 @@
'Mast.HscMatches.Db.v3': 'matchid.json',
'Mast.HscMatches.Db.v2': 'matchid.json',
'Mast.HscSpectra.Db.All': 'spectra.json',
'mast_relative_path': 'mast_relative_path.json',
'panstarrs': 'panstarrs.json',
'panstarrs_columns': 'panstarrs_columns.json',
'tess_cutout': 'astrocut_107.27_-70.0_5x5.zip',
Expand Down Expand Up @@ -142,6 +145,8 @@ def request_mockreturn(url, params={}):
filename = data_path(DATA_FILES["Mast.Name.Lookup"])
elif 'panstarrs' in url:
filename = data_path(DATA_FILES['panstarrs_columns'])
elif 'path_lookup' in url:
filename = data_path(DATA_FILES['mast_relative_path'])
with open(filename, 'rb') as infile:
content = infile.read()
return MockResponse(content)
Expand Down Expand Up @@ -678,6 +683,95 @@ def test_observations_download_file(patch_post, tmpdir):
assert result == ('COMPLETE', None, None)


@patch('boto3.client')
def test_observations_get_cloud_uri(mock_client, patch_post):
pytest.importorskip("boto3")

mast_uri = 'mast:HST/product/u9o40504m_c3m.fits'
expected = 's3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits'

# Error without cloud connection
with pytest.raises(RemoteServiceError):
mast.Observations.get_cloud_uri('mast:HST/product/u9o40504m_c3m.fits')

# Enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# Row input
product = Table()
product['dataURI'] = [mast_uri]
uri = mast.Observations.get_cloud_uri(product[0])
assert isinstance(uri, str)
assert uri == expected

# String input
uri = mast.Observations.get_cloud_uri(mast_uri)
assert uri == expected

mast.Observations.disable_cloud_dataset()


@patch('boto3.client')
def test_observations_get_cloud_uris(mock_client, patch_post):
pytest.importorskip("boto3")

mast_uri = 'mast:HST/product/u9o40504m_c3m.fits'
expected = 's3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits'

# Error without cloud connection
with pytest.raises(RemoteServiceError):
mast.Observations.get_cloud_uris(['mast:HST/product/u9o40504m_c3m.fits'])

# Enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# Get the cloud URIs
# Table input
product = Table()
product['dataURI'] = [mast_uri]
uris = mast.Observations.get_cloud_uris([mast_uri])
assert isinstance(uris, list)
assert len(uris) == 1
assert uris[0] == expected

# List input
uris = mast.Observations.get_cloud_uris([mast_uri])
assert isinstance(uris, list)
assert len(uris) == 1
assert uris[0] == expected

# Warn if attempting to filter with list input
with pytest.warns(InputWarning, match='Filtering is not supported'):
mast.Observations.get_cloud_uris([mast_uri],
extension='png')

# Warn if not found
with pytest.warns(NoResultsWarning, match='Failed to retrieve MAST relative path'):
mast.Observations.get_cloud_uris(['mast:HST/product/does_not_exist.fits'])


@patch('boto3.client')
def test_observations_get_cloud_uris_query(mock_client, patch_post):
pytest.importorskip("boto3")

# enable access to public AWS S3 bucket
mast.Observations.enable_cloud_dataset()

# get uris with streamlined function
uris = mast.Observations.get_cloud_uris(target_name=234295610,
filter_products={'productSubGroupDescription': 'C3M'})
assert isinstance(uris, list)

# check that InvalidQueryError is thrown if neither data_products or **criteria are defined
with pytest.raises(InvalidQueryError):
mast.Observations.get_cloud_uris(filter_products={'productSubGroupDescription': 'C3M'})

# warn if no data products match filters
with pytest.warns(NoResultsWarning, match='No matching products'):
mast.Observations.get_cloud_uris(target_name=234295610,
filter_products={'productSubGroupDescription': 'LC'})


######################
# CatalogClass tests #
######################
Expand Down
Loading

0 comments on commit 8192801

Please sign in to comment.