Skip to content

Commit

Permalink
Merge pull request #3052 from cds-astro/fix-add-votable-fields-fluxes
Browse files Browse the repository at this point in the history
[SIMBAD] Add filter names as possible votable fields
  • Loading branch information
bsipocz authored Jun 27, 2024
2 parents 091e6b3 + 9670d00 commit 1be9420
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 114 deletions.
68 changes: 47 additions & 21 deletions astroquery/simbad/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,14 @@ def list_wildcards():
def list_votable_fields(self):
"""List all options to add columns to SIMBAD's output.
They are of three types:
They are of four types:
- "column of basic": a column of the basic table. There fields can also be explored with
`~astroquery.simbad.SimbadClass.list_columns`.
- "table": a table other than basic that has a declared direct join
- "bundle of basic columns": a pre-selected bundle of columns of basic. Ex: "parallax" will add all
columns relevant to parallax
- "filter name": an optical filter name
Examples
--------
Expand Down Expand Up @@ -266,8 +267,11 @@ def list_votable_fields(self):
"description": [value["description"] for _, value in bundle_entries.items()],
"type": ["bundle of basic columns"] * len(bundle_entries)},
dtype=["object", "object", "object"])
# vstack the three types of options
return vstack([tables, basic_columns, bundles], metadata_conflicts="silent")
# get the filter names
filters = self.query_tap("SELECT filtername AS name, description FROM filter")
filters["type"] = Column(["filter name"] * len(filters), dtype="object")
# vstack the four types of options
return vstack([tables, basic_columns, bundles, filters], metadata_conflicts="silent")

def _get_bundle_columns(self, bundle_name):
"""Get the list of columns in the preselected bundles.
Expand Down Expand Up @@ -324,11 +328,17 @@ def _add_table_to_output(self, table):
"query to be written and called with 'SimbadClass.query_tap'.")

columns = list(self.list_columns(table)["column_name"])
columns = [column.casefold() for column in columns if column not in {"oidref", "oidbibref"}]

# the alias is mandatory to be able to distinguish between duplicates like
# mesDistance.bibcode and mesDiameter.bibcode.
alias = [f'"{table}.{column}"' if not column.startswith(table) else None for column in columns]
# allfluxes is the only case-dependent table
if table == "allfluxes":
columns = [column for column in columns if column not in {"oidref", "oidbibref"}]
alias = [column.replace("_", "") if "_" in column else None
for column in columns]
else:
columns = [column.casefold() for column in columns if column not in {"oidref", "oidbibref"}]
# the alias is mandatory to be able to distinguish between duplicates like
# mesDistance.bibcode and mesDiameter.bibcode.
alias = [f"{table}.{column}" if not column.startswith(table) else None for column in columns]

# modify the attributes here
self.columns_in_output += [_Column(table, column, alias)
Expand Down Expand Up @@ -366,27 +376,43 @@ def add_votable_fields(self, *args):
['basic.main_id', 'basic.ra', 'basic.dec', 'basic.coo_err_maj', 'basic.coo_err_min', ...
"""

# the legacy way of adding fluxes is the only case-dependant option
# the legacy way of adding fluxes
args = list(args)
fluxes_to_add = []
for arg in args:
if arg.startswith("flux_"):
raise ValueError("The votable fields 'flux_***(filtername)' are removed and replaced "
"by 'flux' that will add all information for every filters. "
"See section on filters in "
"https://astroquery.readthedocs.io/en/latest/simbad/simbad_evolution.html"
" to see the new ways to interact with SIMBAD's fluxes.")
if re.match(r"^flux.*\(.+\)$", arg):
warnings.warn("The notation 'flux(X)' is deprecated since 0.4.8. "
warnings.warn("The notation 'flux(U)' is deprecated since 0.4.8 in favor of 'U'. "
"See section on filters in "
"https://astroquery.readthedocs.io/en/latest/simbad/simbad_evolution.html "
"to see how it can be replaced.", DeprecationWarning, stacklevel=2)
flux_filter = re.findall(r"\((\w+)\)", arg)[0]
if len(flux_filter) == 1 and flux_filter.islower():
flux_filter = flux_filter + "_"
self.joins.append(_Join("allfluxes", _Column("basic", "oid"),
_Column("allfluxes", "oidref")))
self.columns_in_output.append(_Column("allfluxes", flux_filter))
"to see the new ways to interact with SIMBAD's fluxes.", DeprecationWarning, stacklevel=2)
fluxes_to_add.append(re.findall(r"\((\w+)\)", arg)[0])
args.remove(arg)

# casefold args
args = set(map(str.casefold, args))

# output options
output_options = self.list_votable_fields()
# fluxes are case-dependant
fluxes = output_options[output_options["type"] == "filter name"]["name"]
# add fluxes
fluxes_to_add += [flux for flux in args if flux in fluxes]
if fluxes_to_add:
self.joins.append(_Join("allfluxes", _Column("basic", "oid"),
_Column("allfluxes", "oidref")))
for flux in fluxes_to_add:
if len(flux) == 1 and flux.islower():
# the name in the allfluxes view has a trailing underscore. This is not
# the case in the list of filter names, so we homogenize with an alias
self.columns_in_output.append(_Column("allfluxes", flux + "_", flux))
else:
self.columns_in_output.append(_Column("allfluxes", flux))

# casefold args because we allow case difference for every other argument (legacy behavior)
args = set(map(str.casefold, args))
output_options["name"] = list(map(str.casefold, list(output_options["name"])))
basic_columns = output_options[output_options["type"] == "column of basic"]["name"]
all_tables = output_options[output_options["type"] == "table"]["name"]
Expand Down Expand Up @@ -1374,7 +1400,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic",
top = f" TOP {top}" if top != -1 else ""

# columns
input_columns = [f'{column.table}."{column.name}" AS {column.alias}' if column.alias is not None
input_columns = [f'{column.table}."{column.name}" AS "{column.alias}"' if column.alias is not None
else f'{column.table}."{column.name}"' for column in columns]
# remove possible duplicates
unique_columns = []
Expand All @@ -1391,7 +1417,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic",
[unique_joins.append(join) for join in joins if join not in unique_joins]
join = " " + " ".join([(f'{join.join_type} {join.table} ON {join.column_left.table}."'
f'{join.column_left.name}" = {join.column_right.table}."'
f'{join.column_right.name}"') for join in joins])
f'{join.column_right.name}"') for join in unique_joins])

# criteria
if criteria != []:
Expand Down
97 changes: 91 additions & 6 deletions astroquery/simbad/tests/data/simbad_output_options.xml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Produced with astropy.io.votable version 6.0.0
<!-- Produced with astropy.io.votable version 6.1.0
http://www.astropy.org/ -->
<VOTABLE version="1.4" xmlns="http://www.ivoa.net/xml/VOTable/v1.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ivoa.net/xml/VOTable/v1.3 http://www.ivoa.net/xml/VOTable/VOTable-1.4.xsd">
<RESOURCE type="results">
<TABLE ID="result_S1711533763224" name="result_S1711533763224">
<FIELD ID="name" arraysize="*" datatype="char" name="name">
<TABLE ID="result_S1719407661907" name="result_S1719407661907">
<FIELD ID="name" arraysize="*" datatype="char" name="name" ucd="instr.filter">
<DESCRIPTION>
column name
flux filter name
</DESCRIPTION>
</FIELD>
<FIELD ID="description" arraysize="*" datatype="char" name="description">
<FIELD ID="description" arraysize="*" datatype="unicodeChar" name="description" ucd="meta.note;instr.filter">
<DESCRIPTION>
brief description of column
flux filter description
</DESCRIPTION>
</FIELD>
<FIELD ID="type" arraysize="*" datatype="unicodeChar" name="type"/>
Expand Down Expand Up @@ -507,6 +507,91 @@
<TD>all fields related with radial velocity and redshift</TD>
<TD>bundle of basic columns</TD>
</TR>
<TR>
<TD>U</TD>
<TD>Magnitude U</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>B</TD>
<TD>Magnitude B</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>V</TD>
<TD>Magnitude V</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>R</TD>
<TD>Magnitude R</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>I</TD>
<TD>Magnitude I</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>J</TD>
<TD>Magnitude J</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>H</TD>
<TD>Magnitude H</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>K</TD>
<TD>Magnitude K</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>u</TD>
<TD>Magnitude SDSS u</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>g</TD>
<TD>Magnitude SDSS g</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>r</TD>
<TD>Magnitude SDSS r</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>i</TD>
<TD>Magnitude SDSS i</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>z</TD>
<TD>Magnitude SDSS z</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>G</TD>
<TD>Magnitude Gaia G</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>F150W</TD>
<TD>JWST NIRCam F150W</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>F200W</TD>
<TD>JWST NIRCam F200W</TD>
<TD>filter name</TD>
</TR>
<TR>
<TD>F444W</TD>
<TD>JWST NIRCan F444W</TD>
<TD>filter name</TD>
</TR>
</TABLEDATA>
</DATA>
</TABLE>
Expand Down
33 changes: 21 additions & 12 deletions astroquery/simbad/tests/test_simbad.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_mocked_simbad():
simbad_instance = simbad.Simbad()
# this mocks the list_votable_fields
options = simbad_instance.list_votable_fields()
assert len(options) > 90
assert len(options) >= 115
# this mocks the hardlimit
assert simbad_instance.hardlimit == 2000000

Expand All @@ -157,16 +157,13 @@ def test_mocked_simbad():
# ----------------------------


@pytest.mark.usefixtures("_mock_basic_columns")
def test_votable_fields_utils(monkeypatch):
monkeypatch.setattr(simbad.SimbadClass, "query_tap",
lambda self, _: Table([["biblio"], ["biblio description"]],
names=["name", "description"],
dtype=["object", "object"]))
@pytest.mark.usefixtures("_mock_simbad_class")
def test_votable_fields_utils():
options = simbad.SimbadClass().list_votable_fields()
assert set(options.group_by("type").groups.keys["type"]) == {"table",
"column of basic",
"bundle of basic columns"}
"bundle of basic columns",
"filter name"}

description = simbad.SimbadClass().get_field_description("velocity")
assert description == 'all fields related with radial velocity and redshift'
Expand Down Expand Up @@ -201,6 +198,7 @@ def test_get_bundle_columns(bundle_name, column):
assert column in simbad.SimbadClass()._get_bundle_columns(bundle_name)


@pytest.mark.usefixtures("_mock_simbad_class")
@pytest.mark.usefixtures("_mock_linked_to_basic")
def test_add_table_to_output(monkeypatch):
# if table = basic, no need to add a join
Expand All @@ -218,10 +216,16 @@ def test_add_table_to_output(monkeypatch):
simbad.core._Column("basic", "oid"),
simbad.core._Column("mesdiameter", "oidref")
) in simbad_instance.joins
assert simbad.core._Column("mesdiameter", "bibcode", '"mesdiameter.bibcode"'
assert simbad.core._Column("mesdiameter", "bibcode", "mesdiameter.bibcode"
) in simbad_instance.columns_in_output
assert simbad.core._Column("mesdiameter", "oidref", '"mesdiameter.oidref"'
assert simbad.core._Column("mesdiameter", "oidref", "mesdiameter.oidref"
) not in simbad_instance.columns_in_output
# add allfluxes to test the special case
monkeypatch.setattr(simbad.SimbadClass, "list_columns", lambda self, _: Table([["U", "u_"]],
names=["column_name"]))
simbad_instance._add_table_to_output("allfluxes")
assert simbad.core._Column("allfluxes", "U") in simbad_instance.columns_in_output
assert simbad.core._Column("allfluxes", "u_", "u") in simbad_instance.columns_in_output


@pytest.mark.usefixtures("_mock_simbad_class")
Expand All @@ -244,6 +248,9 @@ def test_add_votable_fields():
# add a bundle
simbad_instance.add_votable_fields("dimensions")
assert simbad.core._Column("basic", "galdim_majaxis") in simbad_instance.columns_in_output
# add filter name
simbad_instance.add_votable_fields("u")
assert "allfluxes.u_" in simbad_instance.get_votable_fields()
# a column which name has changed should raise a warning but still
# be added under its new name
simbad_instance.columns_in_output = []
Expand All @@ -256,7 +263,9 @@ def test_add_votable_fields():
simbad_instance.add_votable_fields("distance")
# errors are raised for the deprecated fields with options
simbad_instance = simbad.SimbadClass()
with pytest.warns(DeprecationWarning, match=r"The notation \'flux\(X\)\' is deprecated since 0.4.8. *"):
with pytest.raises(ValueError, match=r"The votable fields \'flux_\*\*\*\(filtername\)\' are removed *"):
simbad_instance.add_votable_fields("flux_error(u)")
with pytest.warns(DeprecationWarning, match=r"The notation \'flux\(U\)\' is deprecated since 0.4.8 *"):
simbad_instance.add_votable_fields("flux(u)")
assert "u_" in str(simbad_instance.columns_in_output)
with pytest.raises(ValueError, match="Coordinates conversion and formatting is no longer supported*"):
Expand Down Expand Up @@ -525,7 +534,7 @@ def test_list_linked_tables():
def test_query():
column = simbad.core._Column("basic", "*")
# bare minimum with an alias
expected = 'SELECT basic."main_id" AS my_id FROM basic'
expected = 'SELECT basic."main_id" AS "my_id" FROM basic'
assert simbad.Simbad._query(-1, [simbad.core._Column("basic", "main_id", "my_id")], [],
[], get_query_payload=True)["QUERY"] == expected
# with top
Expand Down
13 changes: 10 additions & 3 deletions astroquery/simbad/tests/test_simbad_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_query_tap(self):
" c 3")
assert expect == str(result)
# Test query_tap raised errors
with pytest.raises(DALOverflowWarning, match="Partial result set *"):
with pytest.warns(DALOverflowWarning, match="Partial result set *"):
truncated_result = Simbad.query_tap("SELECT * from basic", maxrec=2)
assert len(truncated_result) == 2
with pytest.raises(ValueError, match="The maximum number of records cannot exceed 2000000."):
Expand Down Expand Up @@ -173,12 +173,12 @@ def test_add_bundle_to_output(self):
assert len(simbad_instance.columns_in_output) == 8
assert _Column("basic", "galdim_majaxis") in simbad_instance.columns_in_output

def test_add_table_to_output(self):
def test_add_votable_fields(self):
simbad_instance = Simbad()
# empty before the test
simbad_instance.columns_in_output = []
simbad_instance.add_votable_fields("otypes")
assert _Column("otypes", "otype", '"otypes.otype"') in simbad_instance.columns_in_output
assert _Column("otypes", "otype", 'otypes.otype') in simbad_instance.columns_in_output
# tables also require a join
assert _Join("otypes",
_Column("basic", "oid"),
Expand All @@ -191,3 +191,10 @@ def test_add_table_to_output(self):
# mixed columns bundles and tables
simbad_instance.add_votable_fields("flux", "velocity", "update_date")
assert len(simbad_instance.columns_in_output) == 19

# add fluxes by their filter names
simbad_instance = Simbad()
simbad_instance.add_votable_fields("U", "V")
simbad_instance.add_votable_fields("u")
result = simbad_instance.query_object("HD 147933")
assert all(filtername in result.colnames for filtername in {"u", "U", "V"})
Loading

0 comments on commit 1be9420

Please sign in to comment.