DUNE · drinkingkazu · Jan 22, 2023 · Jan 28, 2023 · Feb 17, 2023
diff --git a/cli/simulate_pixels.py b/cli/simulate_pixels.py
@@ -23,15 +23,15 @@
 from tqdm import tqdm
 
 from larndsim import consts
-from larndsim.util import CudaDict, batching
+from larndsim.util import CudaDict, batching, memory_logger
 
 SEED = int(time())
 #BATCH_SIZE = 4000 # track segments
 BATCH_SIZE = 4000 # track segments
 EVENT_BATCH_SIZE = 2 # tpcs
 #WRITE_BATCH_SIZE = 1000 # batches
 WRITE_BATCH_SIZE = 1000 # batches
-EVENT_SEPARATOR = 'spillID' # can be 'eventID' or 'spillID'
+DEFAULT_EVENT_SEPARATOR = 'spillID' # can be 'eventID' or 'spillID'
 
 LOGO = """
   _                      _            _
@@ -85,6 +85,9 @@ def maybe_create_rng_states(n, seed=0, rng_states=None):
 
     return rng_states
 
+
+
+
 def run_simulation(input_filename,
                    pixel_layout,
                    detector_properties,
@@ -94,7 +97,9 @@ def run_simulation(input_filename,
                    light_det_noise_filename='../larndsim/bin/light_noise-module0.npy',
                    bad_channels=None,
                    n_tracks=None,
-                   pixel_thresholds_file=None):
+                   pixel_thresholds_file=None,
+                   event_separator=DEFAULT_EVENT_SEPARATOR,
+                   save_memory=None):
     """
     Command-line interface to run the simulation of a pixelated LArTPC
 
@@ -116,7 +121,13 @@ def run_simulation(input_filename,
             (all tracks).
         pixel_thresholds_file (str): path to npz file containing pixel thresholds. Defaults
             to None.
+        event_separator: a string value used to define event separation
+        save_memory: a string value, if non-empty, this is used as a filename to store memory 
+            snapshot information
     """
+    logger = memory_logger(save_memory is None)
+    logger.start()
+    logger.take_snapshot()
     start_simulation = time()
 
     RangePush("run_simulation")
@@ -190,6 +201,8 @@ def run_simulation(input_filename,
 
     RangePop()
     end_load = time()
+    logger.take_snapshot()
+    logger.archive('loading')
     print(f" {end_load-start_load:.2f} s")
 
     response = cp.load(response_file)
@@ -198,6 +211,8 @@ def run_simulation(input_filename,
     BPG = max(ceil(tracks.shape[0] / TPB),1)
 
     print("******************\nRUNNING SIMULATION\n******************")
+    logger.start()
+    logger.take_snapshot()
     # Reduce dataset if not all tracks to be simulated
     if n_tracks:
         tracks = tracks[:n_tracks]
@@ -241,23 +256,36 @@ def run_simulation(input_filename,
         tracks['t_start'] = np.zeros(tracks.shape[0], dtype=[('t_start', 'f4')])
         tracks['t_end'] = np.zeros(tracks.shape[0], dtype=[('t_end', 'f4')])
 
+    logger.take_snapshot()
+    logger.archive('preparation')
+
     # We calculate the number of electrons after recombination (quenching module)
     # and the position and number of electrons after drifting (drifting module)
     print("Quenching electrons..." , end="")
+    logger.start()
+    logger.take_snapshot()
     start_quenching = time()
     quenching.quench[BPG,TPB](tracks, physics.BIRKS)
     end_quenching = time()
+    logger.take_snapshot()
+    logger.archive('quenching')
     print(f" {end_quenching-start_quenching:.2f} s")
 
     print("Drifting electrons...", end="")
     start_drifting = time()
+    logger.start()
+    logger.take_snapshot()
     drifting.drift[BPG,TPB](tracks)
     end_drifting = time()
+    logger.take_snapshot()
+    logger.archive('drifting')
     print(f" {end_drifting-start_drifting:.2f} s")
 
     if light.LIGHT_SIMULATED:
         print("Calculating optical responses...", end="")
         start_light_time = time()
+        logger.start()
+        logger.take_snapshot()
         lut = np.load(light_lut_filename)['arr']
 
         # clip LUT so that no voxel contains 0 visibility
@@ -271,8 +299,12 @@ def run_simulation(input_filename,
         TPB = 256
         BPG = max(ceil(tracks.shape[0] / TPB),1)
         lightLUT.calculate_light_incidence[BPG,TPB](tracks, lut, light_sim_dat, track_light_voxel)
+        logger.take_snapshot()
+        logger.archive('light')
         print(f" {time()-start_light_time:.2f} s")
 
+    logger.start()
+    logger.take_snapshot()
     # prep output file with truth datasets
     with h5py.File(output_filename, 'a') as output_file:
         output_file.create_dataset("tracks", data=tracks)
@@ -284,10 +316,10 @@ def run_simulation(input_filename,
             output_file.create_dataset("vertices", data=vertices)
 
     # create a lookup table that maps between unique event ids and the segments in the file
-    tot_evids = np.unique(tracks[EVENT_SEPARATOR])
-    _, _, start_idx = np.intersect1d(tot_evids, tracks[EVENT_SEPARATOR], return_indices=True)
-    _, _, rev_idx = np.intersect1d(tot_evids, tracks[EVENT_SEPARATOR][::-1], return_indices=True)
-    end_idx = len(tracks[EVENT_SEPARATOR]) - 1 - rev_idx
+    tot_evids = np.unique(tracks[event_separator])
+    _, _, start_idx = np.intersect1d(tot_evids, tracks[event_separator], return_indices=True)
+    _, _, rev_idx = np.intersect1d(tot_evids, tracks[event_separator][::-1], return_indices=True)
+    end_idx = len(tracks[event_separator]) - 1 - rev_idx
     track_ids = cp.array(np.arange(len(tracks)), dtype='i4')
     # copy to device
     track_ids = cp.asarray(np.arange(segment_ids.shape[0], dtype=int))
@@ -369,9 +401,14 @@ def save_results(event_times, is_first_event, results):
 
         return event_times[-1]
 
+    logger.take_snapshot()
+    logger.archive('preparation2')
+
     # pre-allocate some random number states
     rng_states = maybe_create_rng_states(1024*256, seed=0)
     last_time = 0
+    logger.start()
+    logger.take_snapshot([0])
     for batch_mask in tqdm(batching.TPCBatcher(tracks, tpc_batch_size=EVENT_BATCH_SIZE, tpc_borders=detector.TPC_BORDERS),
                            desc='Simulating batches...', ncols=80, smoothing=0):
         # grab only tracks from current batch
@@ -603,10 +640,14 @@ def save_results(event_times, is_first_event, results):
             last_time = save_results(event_times, is_first_event=last_time==0, results=results_acc)
             results_acc = defaultdict(list)
 
+        logger.take_snapshot([len(logger.log)])
+
     # Always save results after last iteration
     if len(results_acc['event_id']) >0 and len(np.concatenate(results_acc['event_id'], axis=0)) > 0:
         save_results(event_times, is_first_event=last_time==0, results=results_acc)
 
+    logger.take_snapshot([len(logger.log)])
+
     with h5py.File(output_filename, 'a') as output_file:
         if 'configs' in output_file.keys():
             output_file['configs'].attrs['pixel_layout'] = pixel_layout
@@ -615,7 +656,10 @@ def save_results(event_times, is_first_event, results):
 
     RangePop()
     end_simulation = time()
+    logger.take_snapshot([len(logger.log)])
     print(f"Elapsed time: {end_simulation-start_simulation:.2f} s")
+    logger.archive('loop',['loop'])
+    logger.store(save_memory)
 
 if __name__ == "__main__":
     fire.Fire(run_simulation)
diff --git a/larndsim/util/__init__.py b/larndsim/util/__init__.py
@@ -1,2 +1,3 @@
 from .cuda_dict import *
 from .batching import *
+from .memory_logger import *
diff --git a/larndsim/util/memory_logger.py b/larndsim/util/memory_logger.py
@@ -0,0 +1,148 @@
+import pynvml, tracemalloc, time
+import numpy as np
+
+class memory_logger():
+    '''
+    A class to log memory usage over time. 
+    Usage: 
+        - Call "start" to begin with.
+
+        - Call "take_snapshot" to store the current time stamp, cpu and gpu memory info.
+          A caller can provide additional data in the form of a list of values to be stored together.
+          This data is stored in the log attribute.
+
+        - Call "archive" to store the snapshots taken so far. After this call, the log attribute is
+          reset to an empty list. To allow access to the archived data, this function must be called
+          with a string in the argument, which can be used to retrieve data anytime.
+
+        - Call "store" to store the archived data into HDF5 file with PyTable format OR npz file if Pandas/hdf5 is unavailable.
+
+        - If constructed with "do_nothing=True", it doesn't do any of above :)
+
+    Attributes:
+    -----------
+        log - an intermediate data storage saving a list of snapshot (an array of memory usage etc.)
+        data - a data archive which stores a log with an associated string key
+
+    Functions:
+    -----------
+        reset_log()
+            - reset the internal state and remove intermediate data stored locally.
+
+        start()
+            - start the clock, ready to take a snapshot of memory usage
+
+        take_snapshot(additional_data)
+            - record the time and cpu/gpu memory info in the local intermediate storage.
+            ARGS:
+                optionally provide a list of floating point values to be stored in addition.
+
+        archive(data_name,field_names)
+            - store the intermediate data with a key (data_name)
+            ARGS:
+                data_name
+                    - the key string to be associated with the subject data.
+                field_names
+                    - a list of strings to name variables additionally stored in take_snapshot calls   
+
+        store(filename)
+            - store the archived data into a HDF5 file using PyTable or numpy file using numpy.savez function
+            ARGS:
+                filename
+                    - string value to name the data file
+    '''
+    lock=None
+
+    def __init__(self, do_nothing=False):
+        if self.__class__.lock:
+            raise RuntimeError(f'Only one instance is allowed for {self.__class__.__name__}')
+        self._do_nothing = bool(do_nothing)
+        if self._do_nothing:
+            return
+        pynvml.nvmlInit()
+        tracemalloc.start()
+        self.log=[]
+        self.data=dict()
+        self._h = pynvml.nvmlDeviceGetHandleByIndex(0)
+        self._t0=None
+        self.__class__.lock = True
+
+    def __del__(self):
+        self.__class__.lock = False
+        pynvml.nvmlShutdown()
+
+    def reset_log(self):
+        if self._do_nothing: return
+        self._t0 = None
+        self.log = []
+
+    def start(self):
+        if self._do_nothing: return
+        self.reset_log()
+        self._t0 = time.time()
+
+    def store(self,filename='memory_logger.h5'):
+        if self._do_nothing: return
+
+        try:
+            import pandas as pd
+            for group_name in [n for n in self.data.keys() if not n.endswith('_fields')]:
+                data_dict=dict()
+                for i, field_name in enumerate(self.data[group_name+'_fields']):
+                    data_dict[field_name]=self.data[group_name][:,i]
+                df=pd.DataFrame(data_dict)
+                df.to_hdf(filename,group_name,format='table',mode='a')
+        except ImportError:
+            np.savez(filename,**self.data)
+        self.data=dict()
+        self.reset_log()
+
+    def take_snapshot(self,more_data=[]):
+        if self._do_nothing: return
+        mem_info = self.catch_memory()
+        if len(more_data):
+            mem_info = [*mem_info, *more_data]
+
+        # ensure the data size is consistent
+        if len(self.log) == 0 or len(self.log[-1]) == len(mem_info):
+            self.log.append(mem_info)
+        else:
+            raise RuntimeError(f'snapshot size is inconsistent (previous {len(self.log[-1])}, current {len(mem_info)})')
+
+    def archive(self,data_name,field_names=[]):
+        if self._do_nothing: return
+        if len(self.log) == 0:
+            print('No data logged. Nothing to arxiv...')
+            return False
+        if data_name in self.data:
+            raise KeyError(f'Data name {data_name} already exists')
+
+        fields = ['time','cpu_mem_used','cpu_mem_peak','gpu_mem_used','gpu_mem_free']
+        default_size = len(fields)
+        fields = [*fields,*field_names]
+        # check the field length
+        if len(fields) > len(self.log[-1]):
+            raise RuntimeError('The length of data larger than the names of fields')
+
+        if len(fields) < len(self.log[-1]):
+            for i in range(len(self.log[-1])):
+                if i < default_size:
+                    continue
+                fields.append(f'data{i-default_size}')
+
+        self.data[data_name]   = np.array(self.log,dtype=np.float64)
+        self.data[data_name + '_fields'] = fields
+        self.reset_log()
+        return True
+
+    def catch_memory(self):
+        if self._do_nothing: return
+        if self._t0 is None:
+            raise RuntimeError('Must call start function before catch_memory!')
+
+        t = time.time() - self._t0
+        gpu_info = pynvml.nvmlDeviceGetMemoryInfo(self._h)
+        #mem = psutil.virtual_memory()
+        cpu_used, cpu_peak = tracemalloc.get_traced_memory()
+
+        return [t, cpu_used, cpu_peak, gpu_info.used, gpu_info.free]
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 VER = "0.3.1"
 
-reqs = ["numpy", "pytest", "numba>=0.52", "larpix-control", "larpix-geometry", "tqdm", "fire"]
+reqs = ["numpy", "pytest", "numba>=0.52", "larpix-control", "larpix-geometry", "tqdm", "fire", "nvidia-ml-py"]
 
 try:
     import cupy