Skip to content

Commit

Permalink
use deepdiff instead of asdf.commands.diff for output and truth f…
Browse files Browse the repository at this point in the history
…ile comparisons (#868)
  • Loading branch information
braingram authored Sep 20, 2023
2 parents 0bf01f8 + 9f703eb commit b7413cc
Show file tree
Hide file tree
Showing 16 changed files with 276 additions and 41 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ general

- Fix regression tests for PSF fitting methods [#872]

- Fix regression test ``compare_asdf`` function replacing use of
``asdf.commands.diff`` with ``deepdiff`` and add ``deepdiff`` as
a test dependency [#868]

ramp_fitting
------------

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ test = [
'pytest >=4.6.0',
'pytest-astropy',
'metrics_logger >= 0.1.0',
'deepdiff',
]
dev = [
'romancal[docs,test]',
Expand Down
10 changes: 8 additions & 2 deletions romancal/regtest/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,16 @@ def rtdata_module(artifactory_repos, envopt, request, jail):
@pytest.fixture
def ignore_asdf_paths():
ignore_attr = [
"meta.[date, filename]",
# generic asdf stuff that will contain program version numbers
# and other things that will almost certainly change in every case
"asdf_library",
"history",
"cal_logs",
# roman-specific stuff to ignore
"roman.cal_logs",
"roman.meta.date",
# roman.meta.filename is used by the ExposurePipeline so should likely
# not be ignored here
# "roman.meta.filename",
]

return {"ignore": ignore_attr}
Expand Down
187 changes: 180 additions & 7 deletions romancal/regtest/regtestdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,23 @@
import sys
from difflib import unified_diff
from glob import glob as _sys_glob
from io import StringIO
from pathlib import Path

import asdf
import astropy.time
import deepdiff
import gwcs
import numpy as np
import requests
from asdf.commands.diff import diff as asdf_diff
from astropy.units import Quantity
from ci_watson.artifactory_helpers import (
BigdataError,
check_url,
get_bigdata,
get_bigdata_root,
)
from deepdiff.operator import BaseOperator
from gwcs.wcstools import grid_from_bounding_box

# from romancal.lib.suffix import replace_suffix
from romancal.stpipe import RomanStep
Expand Down Expand Up @@ -525,8 +530,176 @@ def _data_glob_url(*url_parts, root=None):
return url_paths


def compare_asdf(result, truth, **kwargs):
f = StringIO()
asdf_diff([result, truth], minimal=False, iostream=f, **kwargs)
if f.getvalue():
f.getvalue()
class NDArrayTypeOperator(BaseOperator):
def __init__(self, rtol=1e-05, atol=1e-08, equal_nan=True, **kwargs):
super().__init__(**kwargs)
self.rtol = rtol
self.atol = atol
self.equal_nan = equal_nan

def give_up_diffing(self, level, diff_instance):
a, b = level.t1, level.t2
meta = {}
if a.shape != b.shape:
meta["shapes"] = [a.shape, b.shape]
if a.dtype != b.dtype:
meta["dtypes"] = [a.dtype, b.dtype]
if isinstance(a, Quantity) and isinstance(b, Quantity):
if a.unit != b.unit:
meta["units"] = [a.unit, b.unit]
if not meta: # only compare if shapes and dtypes match
if not np.allclose(
a, b, rtol=self.rtol, atol=self.atol, equal_nan=self.equal_nan
):
abs_diff = np.abs(a - b)
index = np.unravel_index(np.nanargmax(abs_diff), a.shape)
meta["worst_abs_diff"] = {
"index": index,
"value": abs_diff[index],
}
with np.errstate(invalid="ignore", divide="ignore"):
# 0 / 0 == nan and produces an 'invalid' error
# 1 / 0 == inf and produces a 'divide' error
# ignore these here for computing the fractional diff
fractional_diff = np.abs(a / b)
index = np.unravel_index(np.nanargmax(fractional_diff), a.shape)
meta["worst_fractional_diff"] = {
"index": index,
"value": fractional_diff[index],
}
meta["abs_diff"] = np.nansum(np.abs(a - b))
meta["n_diffs"] = np.count_nonzero(
np.isclose(
a, b, rtol=self.rtol, atol=self.atol, equal_nan=self.equal_nan
)
)
if meta:
diff_instance.custom_report_result("arrays_differ", level, meta)
return True


class TimeOperator(BaseOperator):
def give_up_diffing(self, level, diff_instance):
if level.t1 != level.t2:
diff_instance.custom_report_result(
"times_differ",
level,
{
"difference": level.t1 - level.t2,
},
)
return True


def _wcs_to_ra_dec(wcs):
x, y = grid_from_bounding_box(wcs.bounding_box)
return wcs(x, y)


class WCSOperator(BaseOperator):
def give_up_diffing(self, level, diff_instance):
# for comparing wcs instances this function evaluates
# each wcs and compares the resulting ra and dec outputs
# TODO should we compare the bounding boxes?
ra_a, dec_a = _wcs_to_ra_dec(level.t1)
ra_b, dec_b = _wcs_to_ra_dec(level.t2)
meta = {}
for name, a, b in [("ra", ra_a, ra_b), ("dec", dec_a, dec_b)]:
# TODO do we want to do something fancier than allclose?
if not np.allclose(a, b):
meta[name] = {
"abs_diff": np.abs(a - b),
}
if meta:
diff_instance.custom_report_result(
"wcs_differ",
level,
meta,
)
return True


class DiffResult:
def __init__(self, diff):
self.diff = diff

@property
def identical(self):
return not self.diff

def report(self, **kwargs):
return pprint.pformat(self.diff)


def compare_asdf(result, truth, ignore=None, rtol=1e-05, atol=1e-08, equal_nan=True):
"""
Compare 2 asdf files: result and truth. Note that this comparison is
asymmetric (swapping result and truth will give a different result).
Parameters
----------
result : str
Filename of result asdf file
truth : str
Filename of truth asdf file
ignore : list
List of tree node paths to ignore during the comparison
rtol : float
rtol argument passed to `numpyp.allclose`
atol : float
atol argument passed to `numpyp.allclose`
equal_nan : bool
Ignore nan inequality
Returns
-------
diff_result : DiffResult
result of the comparison
"""
exclude_paths = []
ignore = ignore or []
for path in ignore:
key_path = "".join([f"['{k}']" for k in path.split(".")])
exclude_paths.append(f"root{key_path}")
operators = [
NDArrayTypeOperator(
rtol, atol, equal_nan, types=[asdf.tags.core.NDArrayType, np.ndarray]
),
TimeOperator(types=[astropy.time.Time]),
WCSOperator(types=[gwcs.WCS]),
]
# warnings can be seen in regtest runs which indicate
# that ddtrace logs are evaluated at times after the below
# with statement exits resulting in access attempts on the
# closed asdf file. To try and avoid that we disable
# lazy loading and memmory mapping
open_kwargs = {
"lazy_load": False,
"copy_arrays": True,
}
with asdf.open(result, **open_kwargs) as af0, asdf.open(
truth, **open_kwargs
) as af1:
# swap the inputs here so DeepDiff(truth, result)
# this will create output with 'new_value' referring to
# the value in the result and 'old_value' referring to the truth
diff = deepdiff.DeepDiff(
af1.tree,
af0.tree,
ignore_nan_inequality=equal_nan,
custom_operators=operators,
exclude_paths=exclude_paths,
)
# the conversion between NDArrayType and ndarray adds a bunch
# of type changes, ignore these for now.
# TODO Ideally we could find a way to remove just the NDArrayType ones
if "type_changes" in diff:
del diff["type_changes"]
return DiffResult(diff)
12 changes: 8 additions & 4 deletions romancal/regtest/test_dark_current.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def test_dark_current_subtraction_step(rtdata, ignore_asdf_paths):
output = "r0000101001001001001_01101_0001_WFI01_darkcurrent.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()


@pytest.mark.bigdata
Expand All @@ -40,7 +41,8 @@ def test_dark_current_outfile_step(rtdata, ignore_asdf_paths):
output = "Test_darkcurrent.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()


@pytest.mark.bigdata
Expand All @@ -61,7 +63,8 @@ def test_dark_current_outfile_suffix(rtdata, ignore_asdf_paths):
output = "Test_darkcurrent.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()


@pytest.mark.bigdata
Expand All @@ -83,4 +86,5 @@ def test_dark_current_output(rtdata, ignore_asdf_paths):
output = "r0000101001001001001_01101_0001_WFI01_darkcurrent.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()
3 changes: 2 additions & 1 deletion romancal/regtest/test_jump_det.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ def test_jump_detection_step(rtdata, ignore_asdf_paths):
output = "r0000101001001001001_01101_0001_WFI01_jump.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()
6 changes: 4 additions & 2 deletions romancal/regtest/test_linearity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def test_linearity_step(rtdata, ignore_asdf_paths):
output = "r0000101001001001001_01101_0001_WFI01_linearity.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()


@pytest.mark.bigdata
Expand All @@ -34,4 +35,5 @@ def test_linearity_outfile_step(rtdata, ignore_asdf_paths):
output = "Test_linearity.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()
6 changes: 4 additions & 2 deletions romancal/regtest/test_ramp_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ def test_ramp_fitting_step(rtdata, ignore_asdf_paths):
output = "r0000101001001001001_01101_0001_WFI01_rampfit.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()

output = "rampfit_opt_fitopt.asdf"
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()
3 changes: 2 additions & 1 deletion romancal/regtest/test_refpix.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ def test_refpix_step(rtdata, ignore_asdf_paths):
rtdata.output = output
rtdata.get_truth(f"truth/WFI/image/{output}")

assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
assert diff.identical, diff.report()
27 changes: 27 additions & 0 deletions romancal/regtest/test_regtestdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from roman_datamodels import datamodels as rdm
from roman_datamodels import maker_utils

from romancal.regtest.regtestdata import compare_asdf


def test_compare_asdf_identical(tmp_path):
fn0 = tmp_path / "test0.asdf"
fn1 = tmp_path / "test1.asdf"
l2 = rdm.ImageModel(maker_utils.mk_level2_image(shape=(100, 100)))
l2.save(fn0)
l2.save(fn1)
diff = compare_asdf(fn0, fn1)
assert diff.identical, diff.report()


def test_compare_asdf_differ(tmp_path):
fn0 = tmp_path / "test0.asdf"
fn1 = tmp_path / "test1.asdf"
l2 = rdm.ImageModel(maker_utils.mk_level2_image(shape=(100, 100)))
l2.save(fn0)
l2.data += 1 * l2.data.unit
l2.save(fn1)
diff = compare_asdf(fn0, fn1)
assert not diff.identical, diff.report()
assert "arrays_differ" in diff.diff
assert "root['roman']['data']" in diff.diff["arrays_differ"]
6 changes: 3 additions & 3 deletions romancal/regtest/test_tweakreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def test_tweakreg(rtdata, ignore_asdf_paths, tmp_path):
)
assert "v2v3corr" in tweakreg_out.meta.wcs.available_frames

diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
step.log.info(
"DMS280 MSG: Was the proper TweakReg data produced?"
f" : {(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
f"DMS280 MSG: Was the proper TweakReg data produced? : {diff.identical}"
)
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
assert diff.identical, diff.report()
10 changes: 6 additions & 4 deletions romancal/regtest/test_wfi_dq_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ def test_dq_init_image_step(rtdata, ignore_asdf_paths):
assert "roman.pixeldq" in ramp_out.to_flat_dict()

rtdata.get_truth(f"truth/WFI/image/{output}")
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
step.log.info(
"DMS25 MSG: Was the proper data quality array initialized"
" for the ramp data produced? : "
f"{(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
f"{diff.identical}"
)
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
assert diff.identical, diff.report()


@metrics_logger("DMS26")
Expand Down Expand Up @@ -113,9 +114,10 @@ def test_dq_init_grism_step(rtdata, ignore_asdf_paths):
assert "roman.pixeldq" in ramp_out.to_flat_dict()

rtdata.get_truth(f"truth/WFI/grism/{output}")
diff = compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths)
step.log.info(
"DMS26 MSG: Was proper data quality initialized "
"ramp data produced? : "
f"{(compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None)}"
f"{diff.identical}"
)
assert compare_asdf(rtdata.output, rtdata.truth, **ignore_asdf_paths) is None
assert diff.identical, diff.report()
Loading

0 comments on commit b7413cc

Please sign in to comment.