use deepdiff instead of asdf.commands.diff for output and truth f…

…ile comparisons (#868)
spacetelescope · Sep 20, 2023 · b7413cc · b7413cc
2 parents 0bf01f8 + 9f703eb
commit b7413cc
Show file tree

Hide file tree

Showing 16 changed files with 276 additions and 41 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -10,6 +10,10 @@ general
 
 - Fix regression tests for PSF fitting methods [#872]
 
+- Fix regression test ``compare_asdf`` function replacing use of
+  ``asdf.commands.diff`` with ``deepdiff`` and add ``deepdiff`` as
+  a test dependency [#868]
+
 ramp_fitting
 ------------
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -53,6 +53,7 @@ test = [
     'pytest >=4.6.0',
     'pytest-astropy',
     'metrics_logger >= 0.1.0',
+    'deepdiff',
 ]
 dev = [
     'romancal[docs,test]',

diff --git a/romancal/regtest/conftest.py b/romancal/regtest/conftest.py
@@ -203,10 +203,16 @@ def rtdata_module(artifactory_repos, envopt, request, jail):
 @pytest.fixture
 def ignore_asdf_paths():
     ignore_attr = [
-        "meta.[date, filename]",
+        # generic asdf stuff that will contain program version numbers
+        # and other things that will almost certainly change in every case
         "asdf_library",
         "history",
-        "cal_logs",
+        # roman-specific stuff to ignore
+        "roman.cal_logs",
+        "roman.meta.date",
+        # roman.meta.filename is used by the ExposurePipeline so should likely
+        # not be ignored here
+        # "roman.meta.filename",
     ]
 
     return {"ignore": ignore_attr}

diff --git a/romancal/regtest/regtestdata.py b/romancal/regtest/regtestdata.py
@@ -5,18 +5,23 @@
 import sys
 from difflib import unified_diff
 from glob import glob as _sys_glob
-from io import StringIO
 from pathlib import Path
 
 import asdf
+import astropy.time
+import deepdiff
+import gwcs
+import numpy as np
 import requests
-from asdf.commands.diff import diff as asdf_diff
+from astropy.units import Quantity
 from ci_watson.artifactory_helpers import (
     BigdataError,
     check_url,
     get_bigdata,
     get_bigdata_root,
 )
+from deepdiff.operator import BaseOperator
+from gwcs.wcstools import grid_from_bounding_box
 
 # from romancal.lib.suffix import replace_suffix
 from romancal.stpipe import RomanStep
@@ -525,8 +530,176 @@ def _data_glob_url(*url_parts, root=None):
     return url_paths
 
 
-def compare_asdf(result, truth, **kwargs):
-    f = StringIO()
-    asdf_diff([result, truth], minimal=False, iostream=f, **kwargs)
-    if f.getvalue():
-        f.getvalue()
+class NDArrayTypeOperator(BaseOperator):
+    def __init__(self, rtol=1e-05, atol=1e-08, equal_nan=True, **kwargs):
+        super().__init__(**kwargs)
+        self.rtol = rtol
+        self.atol = atol
+        self.equal_nan = equal_nan
+
+    def give_up_diffing(self, level, diff_instance):
+        a, b = level.t1, level.t2
+        meta = {}
+        if a.shape != b.shape:
+            meta["shapes"] = [a.shape, b.shape]
+        if a.dtype != b.dtype:
+            meta["dtypes"] = [a.dtype, b.dtype]
+        if isinstance(a, Quantity) and isinstance(b, Quantity):
+            if a.unit != b.unit:
+                meta["units"] = [a.unit, b.unit]
+        if not meta:  # only compare if shapes and dtypes match
+            if not np.allclose(
+                a, b, rtol=self.rtol, atol=self.atol, equal_nan=self.equal_nan
+            ):
+                abs_diff = np.abs(a - b)
+                index = np.unravel_index(np.nanargmax(abs_diff), a.shape)
+                meta["worst_abs_diff"] = {
+                    "index": index,
+                    "value": abs_diff[index],
+                }
+                with np.errstate(invalid="ignore", divide="ignore"):
+                    # 0 / 0 == nan and produces an 'invalid' error
+                    # 1 / 0 == inf and produces a 'divide' error
+                    # ignore these here for computing the fractional diff
+                    fractional_diff = np.abs(a / b)
+                index = np.unravel_index(np.nanargmax(fractional_diff), a.shape)
+                meta["worst_fractional_diff"] = {
+                    "index": index,
+                    "value": fractional_diff[index],
+                }
+                meta["abs_diff"] = np.nansum(np.abs(a - b))
+                meta["n_diffs"] = np.count_nonzero(
+                    np.isclose(
+                        a, b, rtol=self.rtol, atol=self.atol, equal_nan=self.equal_nan
+                    )
+                )
+        if meta:
+            diff_instance.custom_report_result("arrays_differ", level, meta)
+        return True
+
+
+class TimeOperator(BaseOperator):
+    def give_up_diffing(self, level, diff_instance):
+        if level.t1 != level.t2:
+            diff_instance.custom_report_result(
+                "times_differ",
+                level,
+                {
+                    "difference": level.t1 - level.t2,
+                },
+            )
+        return True
+
+
+def _wcs_to_ra_dec(wcs):
+    x, y = grid_from_bounding_box(wcs.bounding_box)
+    return wcs(x, y)
+
+
+class WCSOperator(BaseOperator):
+    def give_up_diffing(self, level, diff_instance):
+        # for comparing wcs instances this function evaluates
+        # each wcs and compares the resulting ra and dec outputs
+        # TODO should we compare the bounding boxes?
+        ra_a, dec_a = _wcs_to_ra_dec(level.t1)
+        ra_b, dec_b = _wcs_to_ra_dec(level.t2)
+        meta = {}
+        for name, a, b in [("ra", ra_a, ra_b), ("dec", dec_a, dec_b)]:
+            # TODO do we want to do something fancier than allclose?
+            if not np.allclose(a, b):
+                meta[name] = {
+                    "abs_diff": np.abs(a - b),
+                }
+        if meta:
+            diff_instance.custom_report_result(
+                "wcs_differ",
+                level,
+                meta,
+            )
+        return True
+
+
+class DiffResult:
+    def __init__(self, diff):
+        self.diff = diff
+
+    @property
+    def identical(self):
+        return not self.diff
+
+    def report(self, **kwargs):
+        return pprint.pformat(self.diff)
+
+
+def compare_asdf(result, truth, ignore=None, rtol=1e-05, atol=1e-08, equal_nan=True):
+    """
+    Compare 2 asdf files: result and truth. Note that this comparison is
+    asymmetric (swapping result and truth will give a different result).
+
+    Parameters
+    ----------
+
+    result : str
+        Filename of result asdf file
+
+    truth : str
+        Filename of truth asdf file
+
+    ignore : list
+        List of tree node paths to ignore during the comparison
+
+    rtol : float
+        rtol argument passed to `numpyp.allclose`
+
+    atol : float
+        atol argument passed to `numpyp.allclose`
+
+    equal_nan : bool
+        Ignore nan inequality
+
+    Returns
+    -------
+
+    diff_result : DiffResult
+        result of the comparison
+    """
+    exclude_paths = []
+    ignore = ignore or []
+    for path in ignore:
+        key_path = "".join([f"['{k}']" for k in path.split(".")])
+        exclude_paths.append(f"root{key_path}")
+    operators = [
+        NDArrayTypeOperator(
+            rtol, atol, equal_nan, types=[asdf.tags.core.NDArrayType, np.ndarray]
+        ),
+        TimeOperator(types=[astropy.time.Time]),
+        WCSOperator(types=[gwcs.WCS]),
+    ]
+    # warnings can be seen in regtest runs which indicate
+    # that ddtrace logs are evaluated at times after the below
+    # with statement exits resulting in access attempts on the
+    # closed asdf file. To try and avoid that we disable
+    # lazy loading and memmory mapping
+    open_kwargs = {
+        "lazy_load": False,
+        "copy_arrays": True,
+    }
+    with asdf.open(result, **open_kwargs) as af0, asdf.open(
+        truth, **open_kwargs
+    ) as af1:
+        # swap the inputs here so DeepDiff(truth, result)
+        # this will create output with 'new_value' referring to
+        # the value in the result and 'old_value' referring to the truth
+        diff = deepdiff.DeepDiff(
+            af1.tree,
+            af0.tree,
+            ignore_nan_inequality=equal_nan,
+            custom_operators=operators,
+            exclude_paths=exclude_paths,
+        )
+        # the conversion between NDArrayType and ndarray adds a bunch
+        # of type changes, ignore these for now.
+        # TODO Ideally we could find a way to remove just the NDArrayType ones
+        if "type_changes" in diff:
+            del diff["type_changes"]
+        return DiffResult(diff)
diff --git a/romancal/regtest/test_dark_current.py b/romancal/regtest/test_dark_current.py
@@ -20,7 +20,8 @@ def test_dark_current_subtraction_step(rtdata, ignore_asdf_paths):
     output = "r0000101001001001001_01101_0001_WFI01_darkcurrent.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
 
 
 @pytest.mark.bigdata
@@ -40,7 +41,8 @@ def test_dark_current_outfile_step(rtdata, ignore_asdf_paths):
     output = "Test_darkcurrent.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
 
 
 @pytest.mark.bigdata
@@ -61,7 +63,8 @@ def test_dark_current_outfile_suffix(rtdata, ignore_asdf_paths):
     output = "Test_darkcurrent.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
 
 
 @pytest.mark.bigdata
@@ -83,4 +86,5 @@ def test_dark_current_output(rtdata, ignore_asdf_paths):
     output = "r0000101001001001001_01101_0001_WFI01_darkcurrent.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_jump_det.py b/romancal/regtest/test_jump_det.py
@@ -28,4 +28,5 @@ def test_jump_detection_step(rtdata, ignore_asdf_paths):
     output = "r0000101001001001001_01101_0001_WFI01_jump.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_linearity.py b/romancal/regtest/test_linearity.py
@@ -18,7 +18,8 @@ def test_linearity_step(rtdata, ignore_asdf_paths):
     output = "r0000101001001001001_01101_0001_WFI01_linearity.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
 
 
 @pytest.mark.bigdata
@@ -34,4 +35,5 @@ def test_linearity_outfile_step(rtdata, ignore_asdf_paths):
     output = "Test_linearity.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_ramp_fitting.py b/romancal/regtest/test_ramp_fitting.py
@@ -24,9 +24,11 @@ def test_ramp_fitting_step(rtdata, ignore_asdf_paths):
     output = "r0000101001001001001_01101_0001_WFI01_rampfit.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
 
     output = "rampfit_opt_fitopt.asdf"
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_refpix.py b/romancal/regtest/test_refpix.py
@@ -21,4 +21,5 @@ def test_refpix_step(rtdata, ignore_asdf_paths):
     rtdata.output = output
     rtdata.get_truth(f"truth/WFI/image/{output}")
 
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_regtestdata.py b/romancal/regtest/test_regtestdata.py
@@ -0,0 +1,27 @@
+from roman_datamodels import datamodels as rdm
+from roman_datamodels import maker_utils
+
+from romancal.regtest.regtestdata import compare_asdf
+
+
+def test_compare_asdf_identical(tmp_path):
+    fn0 = tmp_path / "test0.asdf"
+    fn1 = tmp_path / "test1.asdf"
+    l2 = rdm.ImageModel(maker_utils.mk_level2_image(shape=(100, 100)))
+    l2.save(fn0)
+    l2.save(fn1)
+    diff = compare_asdf(fn0, fn1)
+    assert diff.identical, diff.report()
+
+
+def test_compare_asdf_differ(tmp_path):
+    fn0 = tmp_path / "test0.asdf"
+    fn1 = tmp_path / "test1.asdf"
+    l2 = rdm.ImageModel(maker_utils.mk_level2_image(shape=(100, 100)))
+    l2.save(fn0)
+    l2.data += 1 * l2.data.unit
+    l2.save(fn1)
+    diff = compare_asdf(fn0, fn1)
+    assert not diff.identical, diff.report()
+    assert "arrays_differ" in diff.diff
+    assert "root['roman']['data']" in diff.diff["arrays_differ"]
diff --git a/romancal/regtest/test_tweakreg.py b/romancal/regtest/test_tweakreg.py
@@ -93,8 +93,8 @@ def test_tweakreg(rtdata, ignore_asdf_paths, tmp_path):
     )
     assert "v2v3corr" in tweakreg_out.meta.wcs.available_frames
 
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
     step.log.info(
-        "DMS280 MSG: Was the proper TweakReg data produced?"
-        f" : {(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
+        f"DMS280 MSG: Was the proper TweakReg data produced? : {diff.identical}"
     )
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    assert diff.identical, diff.report()
diff --git a/romancal/regtest/test_wfi_dq_init.py b/romancal/regtest/test_wfi_dq_init.py
@@ -58,12 +58,13 @@ def test_dq_init_image_step(rtdata, ignore_asdf_paths):
     assert "roman.pixeldq" in ramp_out.to_flat_dict()
 
     rtdata.get_truth(f"truth/WFI/image/{output}")
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
     step.log.info(
         "DMS25 MSG: Was the proper data quality array initialized"
         " for the ramp data produced? : "
-        f"{(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
+        f"{diff.identical}"
     )
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    assert diff.identical, diff.report()
 
 
 @metrics_logger("DMS26")
@@ -113,9 +114,10 @@ def test_dq_init_grism_step(rtdata, ignore_asdf_paths):
     assert "roman.pixeldq" in ramp_out.to_flat_dict()
 
     rtdata.get_truth(f"truth/WFI/grism/{output}")
+    diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
     step.log.info(
         "DMS26 MSG: Was proper data quality initialized "
         "ramp data produced? : "
-        f"{(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
+        f"{diff.identical}"
     )
-    assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
+    assert diff.identical, diff.report()