Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support eventID separator as a command-line option #109

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 51 additions & 7 deletions cli/simulate_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@
from tqdm import tqdm

from larndsim import consts
from larndsim.util import CudaDict, batching
from larndsim.util import CudaDict, batching, memory_logger

SEED = int(time())
#BATCH_SIZE = 4000 # track segments
BATCH_SIZE = 4000 # track segments
EVENT_BATCH_SIZE = 2 # tpcs
#WRITE_BATCH_SIZE = 1000 # batches
WRITE_BATCH_SIZE = 1000 # batches
EVENT_SEPARATOR = 'spillID' # can be 'eventID' or 'spillID'
DEFAULT_EVENT_SEPARATOR = 'spillID' # can be 'eventID' or 'spillID'

LOGO = """
_ _ _
Expand Down Expand Up @@ -85,6 +85,9 @@ def maybe_create_rng_states(n, seed=0, rng_states=None):

return rng_states




def run_simulation(input_filename,
pixel_layout,
detector_properties,
Expand All @@ -94,7 +97,9 @@ def run_simulation(input_filename,
light_det_noise_filename='../larndsim/bin/light_noise-module0.npy',
bad_channels=None,
n_tracks=None,
pixel_thresholds_file=None):
pixel_thresholds_file=None,
event_separator=DEFAULT_EVENT_SEPARATOR,
save_memory=None):
"""
Command-line interface to run the simulation of a pixelated LArTPC

Expand All @@ -116,7 +121,13 @@ def run_simulation(input_filename,
(all tracks).
pixel_thresholds_file (str): path to npz file containing pixel thresholds. Defaults
to None.
event_separator: a string value used to define event separation
save_memory: a string value, if non-empty, this is used as a filename to store memory
snapshot information
"""
logger = memory_logger(save_memory is None)
logger.start()
logger.take_snapshot()
start_simulation = time()

RangePush("run_simulation")
Expand Down Expand Up @@ -190,6 +201,8 @@ def run_simulation(input_filename,

RangePop()
end_load = time()
logger.take_snapshot()
logger.archive('loading')
print(f" {end_load-start_load:.2f} s")

response = cp.load(response_file)
Expand All @@ -198,6 +211,8 @@ def run_simulation(input_filename,
BPG = max(ceil(tracks.shape[0] / TPB),1)

print("******************\nRUNNING SIMULATION\n******************")
logger.start()
logger.take_snapshot()
# Reduce dataset if not all tracks to be simulated
if n_tracks:
tracks = tracks[:n_tracks]
Expand Down Expand Up @@ -241,23 +256,36 @@ def run_simulation(input_filename,
tracks['t_start'] = np.zeros(tracks.shape[0], dtype=[('t_start', 'f4')])
tracks['t_end'] = np.zeros(tracks.shape[0], dtype=[('t_end', 'f4')])

logger.take_snapshot()
logger.archive('preparation')

# We calculate the number of electrons after recombination (quenching module)
# and the position and number of electrons after drifting (drifting module)
print("Quenching electrons..." , end="")
logger.start()
logger.take_snapshot()
start_quenching = time()
quenching.quench[BPG,TPB](tracks, physics.BIRKS)
end_quenching = time()
logger.take_snapshot()
logger.archive('quenching')
print(f" {end_quenching-start_quenching:.2f} s")

print("Drifting electrons...", end="")
start_drifting = time()
logger.start()
logger.take_snapshot()
drifting.drift[BPG,TPB](tracks)
end_drifting = time()
logger.take_snapshot()
logger.archive('drifting')
print(f" {end_drifting-start_drifting:.2f} s")

if light.LIGHT_SIMULATED:
print("Calculating optical responses...", end="")
start_light_time = time()
logger.start()
logger.take_snapshot()
lut = np.load(light_lut_filename)['arr']

# clip LUT so that no voxel contains 0 visibility
Expand All @@ -271,8 +299,12 @@ def run_simulation(input_filename,
TPB = 256
BPG = max(ceil(tracks.shape[0] / TPB),1)
lightLUT.calculate_light_incidence[BPG,TPB](tracks, lut, light_sim_dat, track_light_voxel)
logger.take_snapshot()
logger.archive('light')
print(f" {time()-start_light_time:.2f} s")

logger.start()
logger.take_snapshot()
# prep output file with truth datasets
with h5py.File(output_filename, 'a') as output_file:
output_file.create_dataset("tracks", data=tracks)
Expand All @@ -284,10 +316,10 @@ def run_simulation(input_filename,
output_file.create_dataset("vertices", data=vertices)

# create a lookup table that maps between unique event ids and the segments in the file
tot_evids = np.unique(tracks[EVENT_SEPARATOR])
_, _, start_idx = np.intersect1d(tot_evids, tracks[EVENT_SEPARATOR], return_indices=True)
_, _, rev_idx = np.intersect1d(tot_evids, tracks[EVENT_SEPARATOR][::-1], return_indices=True)
end_idx = len(tracks[EVENT_SEPARATOR]) - 1 - rev_idx
tot_evids = np.unique(tracks[event_separator])
_, _, start_idx = np.intersect1d(tot_evids, tracks[event_separator], return_indices=True)
_, _, rev_idx = np.intersect1d(tot_evids, tracks[event_separator][::-1], return_indices=True)
end_idx = len(tracks[event_separator]) - 1 - rev_idx
track_ids = cp.array(np.arange(len(tracks)), dtype='i4')
# copy to device
track_ids = cp.asarray(np.arange(segment_ids.shape[0], dtype=int))
Expand Down Expand Up @@ -369,9 +401,14 @@ def save_results(event_times, is_first_event, results):

return event_times[-1]

logger.take_snapshot()
logger.archive('preparation2')

# pre-allocate some random number states
rng_states = maybe_create_rng_states(1024*256, seed=0)
last_time = 0
logger.start()
logger.take_snapshot([0])
for batch_mask in tqdm(batching.TPCBatcher(tracks, tpc_batch_size=EVENT_BATCH_SIZE, tpc_borders=detector.TPC_BORDERS),
desc='Simulating batches...', ncols=80, smoothing=0):
# grab only tracks from current batch
Expand Down Expand Up @@ -603,10 +640,14 @@ def save_results(event_times, is_first_event, results):
last_time = save_results(event_times, is_first_event=last_time==0, results=results_acc)
results_acc = defaultdict(list)

logger.take_snapshot([len(logger.log)])

# Always save results after last iteration
if len(results_acc['event_id']) >0 and len(np.concatenate(results_acc['event_id'], axis=0)) > 0:
save_results(event_times, is_first_event=last_time==0, results=results_acc)

logger.take_snapshot([len(logger.log)])

with h5py.File(output_filename, 'a') as output_file:
if 'configs' in output_file.keys():
output_file['configs'].attrs['pixel_layout'] = pixel_layout
Expand All @@ -615,7 +656,10 @@ def save_results(event_times, is_first_event, results):

RangePop()
end_simulation = time()
logger.take_snapshot([len(logger.log)])
print(f"Elapsed time: {end_simulation-start_simulation:.2f} s")
logger.archive('loop',['loop'])
logger.store(save_memory)

if __name__ == "__main__":
fire.Fire(run_simulation)
1 change: 1 addition & 0 deletions larndsim/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .cuda_dict import *
from .batching import *
from .memory_logger import *
148 changes: 148 additions & 0 deletions larndsim/util/memory_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import pynvml, tracemalloc, time
import numpy as np

class memory_logger():
'''
A class to log memory usage over time.
Usage:
- Call "start" to begin with.

- Call "take_snapshot" to store the current time stamp, cpu and gpu memory info.
A caller can provide additional data in the form of a list of values to be stored together.
This data is stored in the log attribute.

- Call "archive" to store the snapshots taken so far. After this call, the log attribute is
reset to an empty list. To allow access to the archived data, this function must be called
with a string in the argument, which can be used to retrieve data anytime.

- Call "store" to store the archived data into HDF5 file with PyTable format OR npz file if Pandas/hdf5 is unavailable.

- If constructed with "do_nothing=True", it doesn't do any of above :)

Attributes:
-----------
log - an intermediate data storage saving a list of snapshot (an array of memory usage etc.)
data - a data archive which stores a log with an associated string key

Functions:
-----------
reset_log()
- reset the internal state and remove intermediate data stored locally.

start()
- start the clock, ready to take a snapshot of memory usage

take_snapshot(additional_data)
- record the time and cpu/gpu memory info in the local intermediate storage.
ARGS:
optionally provide a list of floating point values to be stored in addition.

archive(data_name,field_names)
- store the intermediate data with a key (data_name)
ARGS:
data_name
- the key string to be associated with the subject data.
field_names
- a list of strings to name variables additionally stored in take_snapshot calls

store(filename)
- store the archived data into a HDF5 file using PyTable or numpy file using numpy.savez function
ARGS:
filename
- string value to name the data file
'''
lock=None

def __init__(self, do_nothing=False):
if self.__class__.lock:
raise RuntimeError(f'Only one instance is allowed for {self.__class__.__name__}')
self._do_nothing = bool(do_nothing)
if self._do_nothing:
return
pynvml.nvmlInit()
tracemalloc.start()
self.log=[]
self.data=dict()
self._h = pynvml.nvmlDeviceGetHandleByIndex(0)
self._t0=None
self.__class__.lock = True

def __del__(self):
self.__class__.lock = False
pynvml.nvmlShutdown()

def reset_log(self):
if self._do_nothing: return
self._t0 = None
self.log = []

def start(self):
if self._do_nothing: return
self.reset_log()
self._t0 = time.time()

def store(self,filename='memory_logger.h5'):
if self._do_nothing: return

try:
import pandas as pd
for group_name in [n for n in self.data.keys() if not n.endswith('_fields')]:
data_dict=dict()
for i, field_name in enumerate(self.data[group_name+'_fields']):
data_dict[field_name]=self.data[group_name][:,i]
df=pd.DataFrame(data_dict)
df.to_hdf(filename,group_name,format='table',mode='a')
except ImportError:
np.savez(filename,**self.data)
self.data=dict()
self.reset_log()

def take_snapshot(self,more_data=[]):
if self._do_nothing: return
mem_info = self.catch_memory()
if len(more_data):
mem_info = [*mem_info, *more_data]

# ensure the data size is consistent
if len(self.log) == 0 or len(self.log[-1]) == len(mem_info):
self.log.append(mem_info)
else:
raise RuntimeError(f'snapshot size is inconsistent (previous {len(self.log[-1])}, current {len(mem_info)})')

def archive(self,data_name,field_names=[]):
if self._do_nothing: return
if len(self.log) == 0:
print('No data logged. Nothing to arxiv...')
return False
if data_name in self.data:
raise KeyError(f'Data name {data_name} already exists')

fields = ['time','cpu_mem_used','cpu_mem_peak','gpu_mem_used','gpu_mem_free']
default_size = len(fields)
fields = [*fields,*field_names]
# check the field length
if len(fields) > len(self.log[-1]):
raise RuntimeError('The length of data larger than the names of fields')

if len(fields) < len(self.log[-1]):
for i in range(len(self.log[-1])):
if i < default_size:
continue
fields.append(f'data{i-default_size}')

self.data[data_name] = np.array(self.log,dtype=np.float64)
self.data[data_name + '_fields'] = fields
self.reset_log()
return True

def catch_memory(self):
if self._do_nothing: return
if self._t0 is None:
raise RuntimeError('Must call start function before catch_memory!')

t = time.time() - self._t0
gpu_info = pynvml.nvmlDeviceGetMemoryInfo(self._h)
#mem = psutil.virtual_memory()
cpu_used, cpu_peak = tracemalloc.get_traced_memory()

return [t, cpu_used, cpu_peak, gpu_info.used, gpu_info.free]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

VER = "0.3.1"

reqs = ["numpy", "pytest", "numba>=0.52", "larpix-control", "larpix-geometry", "tqdm", "fire"]
reqs = ["numpy", "pytest", "numba>=0.52", "larpix-control", "larpix-geometry", "tqdm", "fire", "nvidia-ml-py"]

try:
import cupy
Expand Down