diff --git a/CHANGES.rst b/CHANGES.rst index d844d68292..4204438dee 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,11 @@ New Tools and Services Service fixes and enhancements ------------------------------ +eso +^^^ + +- Add option to retrieve_data from an earlier archive query [#1614] + sdss ^^^^ diff --git a/astroquery/eso/core.py b/astroquery/eso/core.py index 83005624cc..4612757c7e 100644 --- a/astroquery/eso/core.py +++ b/astroquery/eso/core.py @@ -629,7 +629,8 @@ def _download_file(self, url, local_filepath, **kwargs): return resp def retrieve_data(self, datasets, continuation=False, destination=None, - with_calib='none', request_all_objects=False, unzip=True): + with_calib='none', request_all_objects=False, + unzip=True, request_id=None): """ Retrieve a list of datasets form the ESO archive. @@ -657,6 +658,12 @@ def retrieve_data(self, datasets, continuation=False, destination=None, unzip : bool Unzip compressed files from the archive after download. `True` by default. + request_id : str, int + Retrieve from an existing request number rather than sending a new + query, with the identifier from the URL in the email sent from + the archive from the earlier request as in: + + https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id] Returns ------- @@ -694,9 +701,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None, datasets, continuation=continuation, destination=destination) # Second: Check that the datasets to download are in the archive - log.info("Checking availability of datasets to download...") - valid_datasets = [self.verify_data_exists(ds) + if request_id is None: + log.info("Checking availability of datasets to download...") + valid_datasets = [self.verify_data_exists(ds) for ds in datasets_to_download] + else: + # Assume all valid if a request_id was provided + valid_datasets = [(ds, True) for ds in datasets_to_download] + if not all(valid_datasets): invalid_datasets = [ds for ds, v in zip(datasets_to_download, valid_datasets) if not v] @@ -710,33 +722,52 @@ def retrieve_data(self, datasets, continuation=False, destination=None, self.login() url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html" with suspend_cache(self): # Never cache staging operations - log.info("Contacting retrieval server...") - retrieve_data_form = self._request("GET", url, cache=False) - retrieve_data_form.raise_for_status() - log.info("Staging request...") - inputs = {"list_of_datasets": "\n".join(datasets_to_download)} - data_confirmation_form = self._activate_form( - retrieve_data_form, form_index=-1, inputs=inputs, - cache=False) - - data_confirmation_form.raise_for_status() - - root = BeautifulSoup(data_confirmation_form.content, - 'html5lib') - login_button = root.select('input[value=LOGIN]') - if login_button: - raise LoginError("Not logged in. " - "You must be logged in to download data.") - inputs = {} - if with_calib != 'none': - inputs['requestCommand'] = calib_options[with_calib] - - # TODO: There may be another screen for Not Authorized; that - # should be included too - # form name is "retrieve"; no id - data_download_form = self._activate_form( - data_confirmation_form, form_index=-1, inputs=inputs, - cache=False) + if request_id is None: + log.info("Contacting retrieval server...") + retrieve_data_form = self._request("GET", url, + cache=False) + retrieve_data_form.raise_for_status() + log.info("Staging request...") + inputs = {"list_of_datasets": "\n".join(datasets_to_download)} + data_confirmation_form = self._activate_form( + retrieve_data_form, form_index=-1, inputs=inputs, + cache=False) + + data_confirmation_form.raise_for_status() + + root = BeautifulSoup(data_confirmation_form.content, + 'html5lib') + login_button = root.select('input[value=LOGIN]') + if login_button: + raise LoginError("Not logged in. " + "You must be logged in to download data.") + inputs = {} + if with_calib != 'none': + inputs['requestCommand'] = calib_options[with_calib] + + # TODO: There may be another screen for Not Authorized; + # that should be included too + # form name is "retrieve"; no id + data_download_form = self._activate_form( + data_confirmation_form, form_index=-1, inputs=inputs, + cache=False) + else: + # Build URL by hand + request_url = 'https://dataportal.eso.org/rh/requests/' + request_url += f'{self.USERNAME}/{request_id}' + data_download_form = self._request("GET", request_url, + cache=False) + + _content = data_download_form.content.decode('utf-8') + if ('Request Handler - Error' in _content): + # Likely a problem with the request_url + msg = (f"The form at {request_url} returned an error." + " See your recent requests at " + "https://dataportal.eso.org/rh/requests/" + f"{self.USERNAME}/recentRequests") + + raise RemoteServiceError(msg) + log.info("Staging form is at {0}" .format(data_download_form.url)) root = BeautifulSoup(data_download_form.content, 'html5lib') @@ -809,6 +840,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None, log.debug("Files:\n{}".format('\n'.join(fileLinks))) for i, fileLink in enumerate(fileLinks, 1): fileId = fileLink.rsplit('/', maxsplit=1)[1] + + if request_id is not None: + # Since we fetched the script directly without sending + # a new request, check here that the file in the list + # is among those requested in the input list + if fileId.split('.fits')[0] not in datasets_to_download: + continue + log.info("Downloading file {}/{}: {}..." .format(i, nfiles, fileId)) filename = self._request("GET", fileLink, save=True, diff --git a/docs/eso/eso.rst b/docs/eso/eso.rst index 64eaf9c284..ca819d91a2 100644 --- a/docs/eso/eso.rst +++ b/docs/eso/eso.rst @@ -348,6 +348,24 @@ a ``location`` keyword in the call to :meth:`~astroquery.eso.EsoClass.retrieve_d In all cases, if a requested dataset is already found, it is not downloaded again from the archive. +By default, calling ``eso.retrieve_data`` submits a new archive request +through the web form to stage and download the requested ``datasets``. If you +would like to download datasets from an existing request, either submitted +through the functions here or externally, call ``retrieve_data`` with the +``request_id`` option: + +.. code-block:: python + + >>> data_files = eso.retrieve_data(table['DP.ID'][:2], request_id=999999) + +The ``request_id`` can be found in the automatic email sent by the archive after +staging the initial request, i.e., https://dataportal.eso.org/rh/requests/[USERNAME]/{request_id}. A summary of your available requests is shown at https://dataportal.eso.org/rh/requests/[USERNAME]/recentRequests. + +Note: The function does check that the specified retrieval URL based on +``request_id`` is valid and then that the datasets indicated there are +consistent with the user-specified ``datasets``, but there is currently no +reverse checking that the specified ``datasets`` are provided in +``request_id``. Reference/API =============