Skip to content

Commit

Permalink
Merge pull request #1614 from gbrammer/master
Browse files Browse the repository at this point in the history
Allow retrieval from a previous ESO archive request
  • Loading branch information
bsipocz authored Nov 24, 2021
2 parents de0a326 + 0ecad92 commit c8d678b
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 30 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ New Tools and Services
Service fixes and enhancements
------------------------------

eso
^^^

- Add option to retrieve_data from an earlier archive query [#1614]

sdss
^^^^

Expand Down
99 changes: 69 additions & 30 deletions astroquery/eso/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,8 @@ def _download_file(self, url, local_filepath, **kwargs):
return resp

def retrieve_data(self, datasets, continuation=False, destination=None,
with_calib='none', request_all_objects=False, unzip=True):
with_calib='none', request_all_objects=False,
unzip=True, request_id=None):
"""
Retrieve a list of datasets form the ESO archive.
Expand Down Expand Up @@ -657,6 +658,12 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
unzip : bool
Unzip compressed files from the archive after download. `True` by
default.
request_id : str, int
Retrieve from an existing request number rather than sending a new
query, with the identifier from the URL in the email sent from
the archive from the earlier request as in:
https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id]
Returns
-------
Expand Down Expand Up @@ -694,9 +701,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
datasets, continuation=continuation, destination=destination)

# Second: Check that the datasets to download are in the archive
log.info("Checking availability of datasets to download...")
valid_datasets = [self.verify_data_exists(ds)
if request_id is None:
log.info("Checking availability of datasets to download...")
valid_datasets = [self.verify_data_exists(ds)
for ds in datasets_to_download]
else:
# Assume all valid if a request_id was provided
valid_datasets = [(ds, True) for ds in datasets_to_download]

if not all(valid_datasets):
invalid_datasets = [ds for ds, v in zip(datasets_to_download,
valid_datasets) if not v]
Expand All @@ -710,33 +722,52 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
self.login()
url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html"
with suspend_cache(self): # Never cache staging operations
log.info("Contacting retrieval server...")
retrieve_data_form = self._request("GET", url, cache=False)
retrieve_data_form.raise_for_status()
log.info("Staging request...")
inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
data_confirmation_form = self._activate_form(
retrieve_data_form, form_index=-1, inputs=inputs,
cache=False)

data_confirmation_form.raise_for_status()

root = BeautifulSoup(data_confirmation_form.content,
'html5lib')
login_button = root.select('input[value=LOGIN]')
if login_button:
raise LoginError("Not logged in. "
"You must be logged in to download data.")
inputs = {}
if with_calib != 'none':
inputs['requestCommand'] = calib_options[with_calib]

# TODO: There may be another screen for Not Authorized; that
# should be included too
# form name is "retrieve"; no id
data_download_form = self._activate_form(
data_confirmation_form, form_index=-1, inputs=inputs,
cache=False)
if request_id is None:
log.info("Contacting retrieval server...")
retrieve_data_form = self._request("GET", url,
cache=False)
retrieve_data_form.raise_for_status()
log.info("Staging request...")
inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
data_confirmation_form = self._activate_form(
retrieve_data_form, form_index=-1, inputs=inputs,
cache=False)

data_confirmation_form.raise_for_status()

root = BeautifulSoup(data_confirmation_form.content,
'html5lib')
login_button = root.select('input[value=LOGIN]')
if login_button:
raise LoginError("Not logged in. "
"You must be logged in to download data.")
inputs = {}
if with_calib != 'none':
inputs['requestCommand'] = calib_options[with_calib]

# TODO: There may be another screen for Not Authorized;
# that should be included too
# form name is "retrieve"; no id
data_download_form = self._activate_form(
data_confirmation_form, form_index=-1, inputs=inputs,
cache=False)
else:
# Build URL by hand
request_url = 'https://dataportal.eso.org/rh/requests/'
request_url += f'{self.USERNAME}/{request_id}'
data_download_form = self._request("GET", request_url,
cache=False)

_content = data_download_form.content.decode('utf-8')
if ('Request Handler - Error' in _content):
# Likely a problem with the request_url
msg = (f"The form at {request_url} returned an error."
" See your recent requests at "
"https://dataportal.eso.org/rh/requests/"
f"{self.USERNAME}/recentRequests")

raise RemoteServiceError(msg)

log.info("Staging form is at {0}"
.format(data_download_form.url))
root = BeautifulSoup(data_download_form.content, 'html5lib')
Expand Down Expand Up @@ -809,6 +840,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
log.debug("Files:\n{}".format('\n'.join(fileLinks)))
for i, fileLink in enumerate(fileLinks, 1):
fileId = fileLink.rsplit('/', maxsplit=1)[1]

if request_id is not None:
# Since we fetched the script directly without sending
# a new request, check here that the file in the list
# is among those requested in the input list
if fileId.split('.fits')[0] not in datasets_to_download:
continue

log.info("Downloading file {}/{}: {}..."
.format(i, nfiles, fileId))
filename = self._request("GET", fileLink, save=True,
Expand Down
18 changes: 18 additions & 0 deletions docs/eso/eso.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,24 @@ a ``location`` keyword in the call to :meth:`~astroquery.eso.EsoClass.retrieve_d
In all cases, if a requested dataset is already found,
it is not downloaded again from the archive.

By default, calling ``eso.retrieve_data`` submits a new archive request
through the web form to stage and download the requested ``datasets``. If you
would like to download datasets from an existing request, either submitted
through the functions here or externally, call ``retrieve_data`` with the
``request_id`` option:

.. code-block:: python
>>> data_files = eso.retrieve_data(table['DP.ID'][:2], request_id=999999)
The ``request_id`` can be found in the automatic email sent by the archive after
staging the initial request, i.e., https://dataportal.eso.org/rh/requests/[USERNAME]/{request_id}. A summary of your available requests is shown at https://dataportal.eso.org/rh/requests/[USERNAME]/recentRequests.

Note: The function does check that the specified retrieval URL based on
``request_id`` is valid and then that the datasets indicated there are
consistent with the user-specified ``datasets``, but there is currently no
reverse checking that the specified ``datasets`` are provided in
``request_id``.

Reference/API
=============
Expand Down

0 comments on commit c8d678b

Please sign in to comment.