spacetelescope · bmorris3 · Nov 14, 2023 · Nov 9, 2023 · Nov 10, 2023
@@ -72,6 +72,13 @@ scripts
 
 - fixed ``asn_from_list`` script [#972]
 
+source_detection
+----------------
+
+- Support for PSF fitting (optional) for accurate centroids. [#841]
+
+- Save source catalog to a structured array. [#987]
+
 stpipe
 ------
 
@@ -83,11 +90,6 @@ tweakreg
 - Fix a bug due to which source catalog may contain sources
   outside of the bounding box. [#947]
 
-source_detection
-----------------
-
-- Support for PSF fitting (optional) for accurate centroids. [#841]
-
 0.12.0 (2023-08-18)
 ===================
 

@@ -103,3 +103,24 @@ def __exit__(self, et, ev, tb):
         if self.handler and self.close:
             self.handler.close()
         # implicit return of None => don't swallow exceptions
+
+
+def recarray_to_ndarray(x, to_dtype="<f8"):
+    """
+    Convert a structured array to a 2D ndarray.
+
+    Parameters
+    ----------
+    x : np.record
+        Structured array
+    to_dtype : str
+        Cast all columns in `x` to this dtype in the output ndarray.
+
+    Returns
+    -------
+    array : np.ndarray
+        Numpy array (without labeled columns).
+    """
+    names = x.dtype.names
+    astype = [(name, to_dtype) for name in names]
+    return np.asarray(x.astype(astype).view(to_dtype).reshape((-1, len(names))))
@@ -1,8 +1,10 @@
-"""Test basic utils bytes2human"""
+"""Test basic utils"""
 
+import numpy as np
 import pytest
+from astropy.table import Table
 
-from romancal.lib.basic_utils import bytes2human
+from romancal.lib.basic_utils import bytes2human, recarray_to_ndarray
 
 test_data = [
     (1000, "1000B"),
@@ -17,3 +19,21 @@ def test_bytes2human(input_data, result):
     """test the basic conversion"""
 
     assert bytes2human(input_data) == result
+
+
+def test_structured_array_utils():
+    arrays = [np.arange(0, 10), np.arange(10, 20), np.arange(30, 40)]
+    names = "a, b, c"
+
+    recarr0 = np.core.records.fromarrays(
+        arrays, names=names, formats=[arr.dtype for arr in arrays]
+    )
+    round_tripped = recarray_to_ndarray(recarr0)
+    assert np.all(round_tripped == np.column_stack(arrays).astype("<f8"))
+
+    astropy_table = Table(recarr0)
+    assert np.all(
+        np.array(arrays) == np.array([col.data for col in astropy_table.itercols()])
+    )
+
+    assert np.all(astropy_table.as_array() == recarr0)
@@ -183,31 +183,7 @@ def process(self, input):
                     exclude_out_of_bounds=True,
                 )
 
-                # attach source catalog to output model as array in
-                # meta.source_detection the table will be stored as a
-                # 1D array with the four columns concatenated, in order,
-                # with units attached
-                catalog_as_array = np.array(
-                    [
-                        psf_photometry_table["id"].value,
-                        psf_photometry_table["x_fit"].value,
-                        psf_photometry_table["y_fit"].value,
-                        psf_photometry_table["flux_fit"].value,
-                    ]
-                )
-            else:
-                # attach source catalog to output model as array in
-                # meta.source_detection the table will be stored as a
-                # 1D array with the four columns concatenated, in order,
-                # with units attached
-                catalog_as_array = np.array(
-                    [
-                        catalog["id"].value,
-                        catalog["xcentroid"].value,
-                        catalog["ycentroid"].value,
-                        catalog["flux"].value,
-                    ]
-                )
+            catalog_as_recarray = catalog.as_array()
 
             # create meta.source detection section in file
             # if save_catalogs is True, this will be updated with the
@@ -228,7 +204,7 @@ def process(self, input):
                 log.info(f"Saving catalog to file: {cat_filename}.")
 
                 if self.output_cat_filetype == "asdf":
-                    tree = {"tweakreg_catalog": catalog_as_array}
+                    tree = {"tweakreg_catalog": catalog_as_recarray}
                     ff = AsdfFile(tree)
                     ff.write_to(cat_filename)
                 else:
@@ -238,15 +214,26 @@ def process(self, input):
                     "tweakreg_catalog_name"
                 ] = cat_filename
             else:
-                # only attach catalog to file if its being passed to the next step
-                # and save_catalogs is false, since it is not in the schema
-                input_model.meta.source_detection["tweakreg_catalog"] = catalog_as_array
-
                 if self.fit_psf:
+                    # PSF photometry centroid results are stored in an astropy table
+                    # in columns "x_fit" and "y_fit", which we'll rename for
+                    # compatibility with tweakreg:
+                    catalog = psf_photometry_table.copy()
+
+                    # rename the PSF photometry table's columns to match the
+                    # expectated columns in tweakreg:
+                    for old_name, new_name in zip(
+                        ["x_fit", "y_fit"], ["xcentroid", "ycentroid"]
+                    ):
+                        catalog.rename_column(old_name, new_name)
+
+                    input_model.meta.source_detection["psf_catalog"] = catalog
+                else:
+                    # only attach catalog to file if its being passed to the next step
+                    # and save_catalogs is false, since it is not in the schema
                     input_model.meta.source_detection[
-                        "psf_catalog"
-                    ] = psf_photometry_table
-
+                        "tweakreg_catalog"
+                    ] = catalog_as_recarray
             input_model.meta.cal_step["source_detection"] = "COMPLETE"
 
         # just pass input model to next step - catalog is stored in meta

@@ -8,6 +8,7 @@
 import pytest
 from astropy import units as u
 
+from romancal.lib.basic_utils import recarray_to_ndarray
 from romancal.lib.tests.test_psf import add_sources, setup_inputs
 from romancal.source_detection import SourceDetectionStep
 
@@ -38,7 +39,9 @@ def test_dao_vs_psf(self, seed=0):
         source_detect.scalar_threshold = 100
         source_detect.peakmax = None
         dao_result = source_detect.process(image_model)
-        idx, x_dao, y_dao, amp_dao = dao_result.meta.source_detection.tweakreg_catalog
+        idx, x_dao, y_dao, amp_dao = recarray_to_ndarray(
+            dao_result.meta.source_detection.tweakreg_catalog
+        ).T
 
         # check that all injected targets are found by DAO:
         assert len(x_dao) == len(x_true)
@@ -47,7 +50,7 @@ def test_dao_vs_psf(self, seed=0):
         psf_result = source_detect.process(image_model)
         psf_catalog = psf_result.meta.source_detection.psf_catalog
 
-        extract_columns = ["x_fit", "x_err", "y_fit", "y_err", "flux_fit"]
+        extract_columns = ["xcentroid", "x_err", "ycentroid", "y_err", "flux_fit"]
         x_psf, x_err, y_psf, y_err, amp_psf = psf_catalog[extract_columns].itercols()
 
         # check that typical PSF centroids are more accurate than DAO centroids:

@@ -381,7 +381,7 @@ def create_base_image_source_catalog(
     t.add_column([i for i in range(len(t))], name="id", index=0)
     t.add_column([np.float64(i) for i in range(len(t))], name="flux")
     t.rename_columns(["x", "y"], ["xcentroid", "ycentroid"])
-    return t.as_array().T
+    return t.as_array()
 
 
 def add_tweakreg_catalog_attribute(
@@ -441,7 +441,7 @@ def add_tweakreg_catalog_attribute(
             tmp_path, tweakreg_catalog_filename
         )
     else:
-        # SourceDetectionStep attaches the catalog data as a 4D numpy array
+        # SourceDetectionStep attaches the catalog data as a structured array
         input_dm.meta.source_detection["tweakreg_catalog"] = source_catalog
 
 
@@ -1111,8 +1111,8 @@ def test_imodel2wcsim_valid_column_names(tmp_path, base_image, column_names):
     imcats = list(map(step._imodel2wcsim, g))
 
     assert all(x.meta["image_model"] == y for x, y in zip(imcats, [img_1, img_2]))
-    assert all(
-        all(x.meta["catalog"] == y.meta.tweakreg_catalog)
+    assert np.all(
+        x.meta["catalog"] == y.meta.tweakreg_catalog
         for x, y in zip(imcats, [img_1, img_2])
     )
 

@@ -174,11 +174,8 @@ def process(self, input):
                     image_model.meta.source_detection, "tweakreg_catalog_name"
                 )
                 if is_tweakreg_catalog_present:
-                    # read catalog in 4D numpy array format
-                    catalog = Table(
-                        data=image_model.meta.source_detection.tweakreg_catalog.T,
-                        names=("id", "xcentroid", "ycentroid", "flux"),
-                    )
+                    # read catalog from structured array
+                    catalog = Table(image_model.meta.source_detection.tweakreg_catalog)
                 elif is_tweakreg_catalog_name_present:
                     catalog = Table.read(
                         image_model.meta.source_detection.tweakreg_catalog_name,
@@ -245,7 +242,7 @@ def process(self, input):
                     )
 
             # set meta.tweakreg_catalog
-            image_model.meta["tweakreg_catalog"] = catalog
+            image_model.meta["tweakreg_catalog"] = catalog.as_array()
 
             nsources = len(catalog)
             if nsources == 0:
@@ -556,6 +553,9 @@ def _imodel2wcsim(self, image_model):
             if isinstance(catalog, str):
                 # a string with the name of the catalog was provided
                 catalog = Table.read(catalog, format=catalog_format)
+            else:
+                # catalog is a structured array, convert to astropy table:
+                catalog = Table(catalog)
 
             catalog.meta["name"] = (
                 str(catalog) if isinstance(catalog, str) else model_name