continue working on CI

rafcabezas · Apr 22, 2020 · 9da8279 · 9da8279
1 parent 425cdc0
commit 9da8279
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 49 deletions.
diff --git a/selfdrive/car/tesla/readconfig.py b/selfdrive/car/tesla/readconfig.py
@@ -1,8 +1,9 @@
 import configparser
 from common.params import Params
 import subprocess
+from common.basedir import BASEDIR
 
-default_config_file_path = '/data/bb_openpilot.cfg'
+default_config_file_path = '%s/../bb_openpilot.cfg' % BASEDIR
 
 class ConfigFile():
   config_file_r = 'r'

diff --git a/selfdrive/car/tesla/readconfig.sh b/selfdrive/car/tesla/readconfig.sh
@@ -1,4 +1,7 @@
-CFG_FILE=/data/bb_openpilot.cfg
+if [ -z "$BASEDIR" ]; then
+  BASEDIR="/data/openpilot"
+fi
+CFG_FILE="$BASEDIR/../bb_openpilot.cfg"
 CFG_CONTENT=$(cat $CFG_FILE | sed -r "s/'/SINGLE_Q/" | sed -r '/[^=]+=[^=]+/!d' | sed -r 's/\s+=\s/=/g' | sed -e 's/[[:space:]]*\=[[:space:]]*/=/g' \
         -e 's/#.*$//' \
         -e 's/[[:space:]]*$//' \

diff --git a/selfdrive/modeld/runners/tensorrt_runner.py b/selfdrive/modeld/runners/tensorrt_runner.py
@@ -11,27 +11,40 @@
 from pathlib import Path
 import tensorflow as tf
 import tensorrt as trt
+import argparse
+from onnx import ModelProto
 
 HostDeviceMemory = namedtuple('HostDeviceMemory', 'host_memory device_memory')
 
-def allocate_buffers(cls, engine):
-  inputs = []
-  outputs = []
-  bindings = []
-  stream = cuda.Stream()
-  for binding in engine:
-    size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
-    dtype = trt.nptype(engine.get_binding_dtype(binding))
-    # Allocate host and device buffers
-    host_memory = cuda.pagelocked_empty(size, dtype)
-    device_memory = cuda.mem_alloc(host_memory.nbytes)
-    bindings.append(int(device_memory))
-    if engine.binding_is_input(binding):
-        inputs.append(HostDeviceMemory(host_memory, device_memory))
-    else:
-        outputs.append(HostDeviceMemory(host_memory, device_memory))
-
-  return inputs, outputs, bindings, stream
+def allocate_buffers(engine, batch_size, data_type, insize,outsize):
+
+   """
+   This is the function to allocate buffers for input and output in the device
+   Args:
+      engine : The path to the TensorRT engine. 
+      batch_size : The batch size for execution time.
+      data_type: The type of the data for input and output, for example trt.float32. 
+   
+   Output:
+      h_input_1: Input in the host.
+      d_input_1: Input in the device. 
+      h_output_1: Output in the host. 
+      d_output_1: Output in the device. 
+      stream: CUDA stream.
+
+   """
+
+   # Determine dimensions and create page-locked memory buffers (which won't be swapped to disk) to hold host inputs/outputs.
+   h_input_1 = cuda.pagelocked_empty(batch_size * insize, dtype=trt.float32)
+   h_output = cuda.pagelocked_empty(batch_size * outsize, dtype=trt.float32) #trt.nptype(data_type))
+   # Allocate device memory for inputs and outputs.
+   d_input_1 = cuda.mem_alloc(h_input_1.nbytes)
+
+   d_output = cuda.mem_alloc(h_output.nbytes)
+   # Create a stream in which to copy inputs/outputs and run inference.
+   stream = cuda.Stream()
+   return h_input_1, d_input_1, h_output, d_output, stream 
+
 
 def infer(cls, context, bindings, inputs, outputs, stream, batch_size=1):
   # Transfer input data to the GPU.
@@ -66,43 +79,58 @@ def run_loop(tf_sess,input_tensor_name,output_tensor_name):
   isize = input_tensor.size
   osize = output_tensor.size
 
+def load_data_to_buffer(idata, pagelocked_buffer):
+   preprocessed = np.asarray(idata).ravel()
+   np.copyto(pagelocked_buffer, preprocessed)
+
 if __name__ == "__main__":
   print(tf.__version__, file=sys.stderr)
   model_file = Path(sys.argv[1])
-  uff_model = Path('%s/%s/%s.trt' % (model_file.parent.as_posix(),'trt',model_file.stem))
-  metadata_path = Path('%s/%s/%s.metadata' % (model_file.parent.as_posix(), 'trt', model_file.stem))
-  with open(metadata_path.as_posix(), 'r') as metadata, trt.Builder() as builder, builder.create_network() as network, trt.UffParser() as parser:
-     metadata = json.loads(metadata.read())
-     # Configure inputs and outputs
-     print('Configuring I/O')
-     input_names = metadata['input_names']
-     output_names = metadata['output_names']
-     for name in input_names:
-         parser.register_input(name, (self.cfg.TARGET_D, self.cfg.TARGET_H, self.cfg.TARGET_W))
-
-     for name in output_names:
-         parser.register_output(name)
-     # Parse network
-     print('Parsing TensorRT Network')
-     parser.parse(uff_model.as_posix(), network)
-     print('Building CUDA Engine')
-     engine = builder.build_cuda_engine(network)
-     # Allocate buffers
-     print('Allocating Buffers')
-     inputs, outputs, bindings, stream = allocate_buffers(engine)
-     print('Ready')
-
-  isize = inputs.size
-  osize = output.size
-
-  print("Ready to run keras model %d -> %d " % (isize,osize), file=sys.stderr)
+  onnx_model = Path('%s/%s.onnx' % (model_file.parent.as_posix(),model_file.stem))
+  print(" ONNX  [%s] " % (onnx_model),file=sys.stderr)
+  logger = trt.Logger(trt.Logger.WARNING)
+  batch_size = 1 
+
+  #model = ModelProto()
+  #with open(onnx_model, "rb") as f:
+  #  model.ParseFromString(f.read())
+
+  d0 = 394250 #model.graph.input[0].type.tensor_type.shape.dim[1].dim_value
+  d1 = 1 #model.graph.input[0].type.tensor_type.shape.dim[2].dim_value
+  d2 = 1 # model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
+  o0 = 2895 #model.graph.output[0].type.tensor_type.shape.dim[1].dim_value
+  o1 = 1 #model.graph.output[0].type.tensor_type.shape.dim[2].dim_value
+  o2 = 1 #model.graph.output[0].type.tensor_type.shape.dim[3].dim_value
+  out_size = o0 * o1 * o2
+  shape = [batch_size , d0, d1 ,d2]
+  print(" Input shape ",shape,' Output size ',out_size,file=sys.stderr)
+  trt.init_libnvinfer_plugins(logger, '')
+  builder = trt.Builder(logger)
+  network = builder.create_network()
+  print('Parsing TensorRT Network',file=sys.stderr)
+  parser = trt.OnnxParser(network, logger)
+  builder.max_workspace_size = (256 << 20)
+  with open(onnx_model, 'rb') as model:
+    parser.parse(model.read())
+  network.get_input(0).shape = shape
+  print('Building CUDA Engine',file=sys.stderr)
+  engine = builder.build_cuda_engine(network)
+  print('Allocating buffers ',file=sys.stderr)
+  h_input, d_input, h_output, d_output, stream = allocate_buffers(engine, 1, trt.float32,d0,o0)
+  print("Ready to run TensorRT CUDA model",  file=sys.stderr)
+  context = engine.create_execution_context()
   while 1:
     # check parent process, if ppid is 1, then modeld is no longer running and the runner should exit.
     if os.getppid() == 1:
       print("exiting due to Parent PID", file=sys.stderr)  
       break
     idata = read(isize).reshape((1, isize))
-    with engine.create_execution_context() as context:
-      ret = infer(context=context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
+    load_data_to_buffer(idata, h_input_1)
+    cuda.memcpy_htod_async(d_input_1, h_input_1, stream)
+    context.execute(batch_size=1, bindings=[int(d_input_1), int(d_output)])
+    cuda.memcpy_dtoh_async(h_output, d_output, stream)
+    stream.synchronize()
+    ret = h_output.reshape((batch_size,-1, 1, out_size))
     write(ret)
+  #TODO: clean memory and free resources