Skip to content

Commit

Permalink
910 Handle hitless events post Sophronia
Browse files Browse the repository at this point in the history
#910

[author: gonzaponte]

As discussed in #897, we implement a solution to hitless events in
Esmeralda and Beersheba. Hitless events are preserved in the
`/Run/events` table but are (naturally) missing in `/RECO/Events`.

Closes #897.

[reviewer: jwaiton]

Introduces a solution for hitless events post-Sophronia that stops
Esmeralda and Beersheba from crashing, and provides a warning to the
user if they occur. Good work!
  • Loading branch information
jwaiton authored and carhc committed Oct 30, 2024
2 parents ec63618 + 81be9a7 commit 237ab2e
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 4 deletions.
5 changes: 3 additions & 2 deletions invisible_cities/cities/beersheba.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@


# Temporary. The removal of the event model will fix this.
from collections import defaultdict
def hitc_to_df_(hitc):
columns = defaultdict(list)
columns = "event time npeak Xpeak Ypeak nsipm X Y Xrms Yrms Z Q E Qc Ec track_id Ep".split()
columns = {col:[] for col in columns}

for hit in hitc.hits:
columns["event" ].append(hitc.event)
columns["time" ].append(hitc.time)
Expand Down
14 changes: 14 additions & 0 deletions invisible_cities/cities/beersheba_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os

import numpy as np
import tables as tb
import pandas as pd
Expand Down Expand Up @@ -185,3 +186,16 @@ def test_beersheba_filters_empty_dfs(beersheba_config, config_tmpdir):

df = dio.load_dst(path_out, "Filters", "nohits")
assert df.passed.tolist() == [False]


@mark.filterwarnings("ignore:Event .* does not contain hits")
@mark.filterwarnings("ignore:dataframe contains strings longer than allowed")
@mark.filterwarnings("ignore:Input file does not contain /config group")
def test_beersheba_does_not_crash_with_no_hits(beersheba_config, Th228_hits_missing, config_tmpdir):
path_out = os.path.join(config_tmpdir, "beersheba_does_not_crash_with_no_hits.h5")
beersheba_config.update(dict( files_in = Th228_hits_missing
, file_out = path_out
, event_range = 1))

# just test that it doesn't crash
beersheba(**beersheba_config)
9 changes: 7 additions & 2 deletions invisible_cities/cities/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,12 +594,17 @@ def hits_and_kdst_from_files( paths : List[str]
except (tb.exceptions.NoSuchNodeError, IndexError):
continue

check_lengths(event_info, hits_df.event.unique())
check_lengths(event_info, kdst_df.event.unique())

for evtinfo in event_info:
event_number, timestamp = evtinfo.fetch_all_fields()
hits = hits_from_df(hits_df.loc[hits_df.event == event_number])
yield dict(hits = hits[event_number],
if len(hits):
hits = hits[event_number]
else:
warnings.warn(f"Event {event_number} does not contain hits", UserWarning)
hits = HitCollection(event_number, timestamp, [])
yield dict(hits = hits,
kdst = kdst_df.loc[kdst_df.event==event_number],
run_number = run_number,
event_number = event_number,
Expand Down
9 changes: 9 additions & 0 deletions invisible_cities/cities/components_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,15 @@ def test_hits_and_kdst_from_files(ICDATADIR):
assert type(output['kdst']) == pd.DataFrame


@mark.filterwarnings("ignore:Event .* does not contain hits")
def test_hits_and_kdst_from_files_missing_hits(Th228_hits_missing, config_tmpdir):
n_events_true = len(pd.read_hdf(Th228_hits_missing, "/Run/events"))

generator = hits_and_kdst_from_files([Th228_hits_missing], "RECO", "Events")
n_events = sum(1 for _ in generator)
assert n_events == n_events_true


def test_collect():
the_source = list(range(0,10))
the_collector = collect()
Expand Down
15 changes: 15 additions & 0 deletions invisible_cities/cities/esmeralda_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
import shutil
import numpy as np
import tables as tb

from pytest import mark

from .. core import system_of_units as units
from .. io import dst_io as dio
from . esmeralda import esmeralda
Expand Down Expand Up @@ -199,3 +202,15 @@ def test_esmeralda_filters_events_with_too_many_hits(esmeralda_config, Th228_tra
assert (summary.evt_ntrks > 0 ).tolist() == evt_pass
assert (summary.evt_nhits < nhits_max).tolist() == evt_pass
assert filter_output.passed.tolist() == evt_pass


@mark.filterwarnings("ignore:Event .* does not contain hits")
@mark.filterwarnings("ignore:Input file does not contain /config group")
def test_esmeralda_does_not_crash_with_no_hits(esmeralda_config, Th228_hits_missing, config_tmpdir):
path_out = os.path.join(config_tmpdir, "esmeralda_does_not_crash_with_no_hits.h5")
esmeralda_config.update(dict( files_in = Th228_hits_missing
, file_out = path_out
, event_range = 1))

# just test that it doesn't crash
esmeralda(**esmeralda_config)
27 changes: 27 additions & 0 deletions invisible_cities/cities/sophronia_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .. core.system_of_units import pes
from . sophronia import sophronia


def test_sophronia_runs(sophronia_config, config_tmpdir):
path_out = os.path.join(config_tmpdir, 'sophronia_runs.h5')
nevt_req = 1
Expand Down Expand Up @@ -97,6 +98,9 @@ def test_sophronia_filters_events_with_only_nn_hits(config_tmpdir, sophronia_con

sophronia(**config)

with tb.open_file(config["files_in"]) as input_file:
event_number = input_file.root.Run.events[0][0]

with tb.open_file(path_out) as output_file:
# Check that the event passes the s12_selector, which is
# applied earlier. Then check it doesn't pass the valid_hit
Expand All @@ -105,3 +109,26 @@ def test_sophronia_filters_events_with_only_nn_hits(config_tmpdir, sophronia_con
# (event_number, passed_flag)
assert output_file.root.Filters.s12_selector[0][1]
assert not output_file.root.Filters.valid_hit [0][1]


def test_sophronia_keeps_hitless_events(config_tmpdir, sophronia_config):
"""
Run with a high q threshold so all hits are discarded (turned into NN).
Check that these events are still in the /Run/events output, but not in
the /RECO/events output.
"""
path_out = os.path.join(config_tmpdir, 'test_sophronia_keeps_hitless_events.h5')
config = dict(**sophronia_config)
config.update(dict( file_out = path_out
, q_thr = 1e4 * pes
, event_range = 1 ))

sophronia(**config)

with tb.open_file(config["files_in"]) as input_file:
event_number = input_file.root.Run.events[0][0]

with tb.open_file(path_out) as output_file:
assert len(output_file.root.Run.events) == 1
assert event_number == output_file.root.Run.events[0][0]
assert event_number not in output_file.root.RECO.Events.col("event")
17 changes: 17 additions & 0 deletions invisible_cities/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
import pytest
import shutil

import numpy as np
import tables as tb

from pandas import DataFrame
from collections import namedtuple
Expand Down Expand Up @@ -637,6 +640,20 @@ def Th228_deco_separate(ICDATADIR):
return filename


@pytest.fixture(scope="session")
def Th228_hits_missing(Th228_hits, config_tmpdir):
"""Copy input file and remove the hits from the first event"""
outpath = os.path.basename(Th228_hits).replace(".h5", "_missing_hits.h5")
outpath = os.path.join(config_tmpdir, outpath)
shutil.copy(Th228_hits, outpath)
with tb.open_file(outpath, "r+") as file:
first_evt = file.root.Run.events[0][0]
evt_rows = [row[0] == first_evt for row in file.root.RECO.Events]
n_delete = sum(evt_rows)
file.root.RECO.Events.remove_rows(0, n_delete)
return outpath


@pytest.fixture(scope="session")
def next100_mc_krmap(ICDATADIR):
filename = "map_NEXT100_MC.h5"
Expand Down

0 comments on commit 237ab2e

Please sign in to comment.