Skip to content

Commit

Permalink
continue working on CI
Browse files Browse the repository at this point in the history
  • Loading branch information
BogGyver committed Apr 22, 2020
1 parent 425cdc0 commit 9da8279
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 49 deletions.
3 changes: 2 additions & 1 deletion selfdrive/car/tesla/readconfig.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import configparser
from common.params import Params
import subprocess
from common.basedir import BASEDIR

default_config_file_path = '/data/bb_openpilot.cfg'
default_config_file_path = '%s/../bb_openpilot.cfg' % BASEDIR

class ConfigFile():
config_file_r = 'r'
Expand Down
5 changes: 4 additions & 1 deletion selfdrive/car/tesla/readconfig.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
CFG_FILE=/data/bb_openpilot.cfg
if [ -z "$BASEDIR" ]; then
BASEDIR="/data/openpilot"
fi
CFG_FILE="$BASEDIR/../bb_openpilot.cfg"
CFG_CONTENT=$(cat $CFG_FILE | sed -r "s/'/SINGLE_Q/" | sed -r '/[^=]+=[^=]+/!d' | sed -r 's/\s+=\s/=/g' | sed -e 's/[[:space:]]*\=[[:space:]]*/=/g' \
-e 's/#.*$//' \
-e 's/[[:space:]]*$//' \
Expand Down
122 changes: 75 additions & 47 deletions selfdrive/modeld/runners/tensorrt_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,40 @@
from pathlib import Path
import tensorflow as tf
import tensorrt as trt
import argparse
from onnx import ModelProto

HostDeviceMemory = namedtuple('HostDeviceMemory', 'host_memory device_memory')

def allocate_buffers(cls, engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_memory = cuda.pagelocked_empty(size, dtype)
device_memory = cuda.mem_alloc(host_memory.nbytes)
bindings.append(int(device_memory))
if engine.binding_is_input(binding):
inputs.append(HostDeviceMemory(host_memory, device_memory))
else:
outputs.append(HostDeviceMemory(host_memory, device_memory))

return inputs, outputs, bindings, stream
def allocate_buffers(engine, batch_size, data_type, insize,outsize):

"""
This is the function to allocate buffers for input and output in the device
Args:
engine : The path to the TensorRT engine.
batch_size : The batch size for execution time.
data_type: The type of the data for input and output, for example trt.float32.
Output:
h_input_1: Input in the host.
d_input_1: Input in the device.
h_output_1: Output in the host.
d_output_1: Output in the device.
stream: CUDA stream.
"""

# Determine dimensions and create page-locked memory buffers (which won't be swapped to disk) to hold host inputs/outputs.
h_input_1 = cuda.pagelocked_empty(batch_size * insize, dtype=trt.float32)
h_output = cuda.pagelocked_empty(batch_size * outsize, dtype=trt.float32) #trt.nptype(data_type))
# Allocate device memory for inputs and outputs.
d_input_1 = cuda.mem_alloc(h_input_1.nbytes)

d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
return h_input_1, d_input_1, h_output, d_output, stream


def infer(cls, context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
Expand Down Expand Up @@ -66,43 +79,58 @@ def run_loop(tf_sess,input_tensor_name,output_tensor_name):
isize = input_tensor.size
osize = output_tensor.size

def load_data_to_buffer(idata, pagelocked_buffer):
preprocessed = np.asarray(idata).ravel()
np.copyto(pagelocked_buffer, preprocessed)

if __name__ == "__main__":
print(tf.__version__, file=sys.stderr)
model_file = Path(sys.argv[1])
uff_model = Path('%s/%s/%s.trt' % (model_file.parent.as_posix(),'trt',model_file.stem))
metadata_path = Path('%s/%s/%s.metadata' % (model_file.parent.as_posix(), 'trt', model_file.stem))
with open(metadata_path.as_posix(), 'r') as metadata, trt.Builder() as builder, builder.create_network() as network, trt.UffParser() as parser:
metadata = json.loads(metadata.read())
# Configure inputs and outputs
print('Configuring I/O')
input_names = metadata['input_names']
output_names = metadata['output_names']
for name in input_names:
parser.register_input(name, (self.cfg.TARGET_D, self.cfg.TARGET_H, self.cfg.TARGET_W))

for name in output_names:
parser.register_output(name)
# Parse network
print('Parsing TensorRT Network')
parser.parse(uff_model.as_posix(), network)
print('Building CUDA Engine')
engine = builder.build_cuda_engine(network)
# Allocate buffers
print('Allocating Buffers')
inputs, outputs, bindings, stream = allocate_buffers(engine)
print('Ready')

isize = inputs.size
osize = output.size

print("Ready to run keras model %d -> %d " % (isize,osize), file=sys.stderr)
onnx_model = Path('%s/%s.onnx' % (model_file.parent.as_posix(),model_file.stem))
print(" ONNX [%s] " % (onnx_model),file=sys.stderr)
logger = trt.Logger(trt.Logger.WARNING)
batch_size = 1

#model = ModelProto()
#with open(onnx_model, "rb") as f:
# model.ParseFromString(f.read())

d0 = 394250 #model.graph.input[0].type.tensor_type.shape.dim[1].dim_value
d1 = 1 #model.graph.input[0].type.tensor_type.shape.dim[2].dim_value
d2 = 1 # model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
o0 = 2895 #model.graph.output[0].type.tensor_type.shape.dim[1].dim_value
o1 = 1 #model.graph.output[0].type.tensor_type.shape.dim[2].dim_value
o2 = 1 #model.graph.output[0].type.tensor_type.shape.dim[3].dim_value
out_size = o0 * o1 * o2
shape = [batch_size , d0, d1 ,d2]
print(" Input shape ",shape,' Output size ',out_size,file=sys.stderr)
trt.init_libnvinfer_plugins(logger, '')
builder = trt.Builder(logger)
network = builder.create_network()
print('Parsing TensorRT Network',file=sys.stderr)
parser = trt.OnnxParser(network, logger)
builder.max_workspace_size = (256 << 20)
with open(onnx_model, 'rb') as model:
parser.parse(model.read())
network.get_input(0).shape = shape
print('Building CUDA Engine',file=sys.stderr)
engine = builder.build_cuda_engine(network)
print('Allocating buffers ',file=sys.stderr)
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine, 1, trt.float32,d0,o0)
print("Ready to run TensorRT CUDA model", file=sys.stderr)
context = engine.create_execution_context()
while 1:
# check parent process, if ppid is 1, then modeld is no longer running and the runner should exit.
if os.getppid() == 1:
print("exiting due to Parent PID", file=sys.stderr)
break
idata = read(isize).reshape((1, isize))
with engine.create_execution_context() as context:
ret = infer(context=context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
load_data_to_buffer(idata, h_input_1)
cuda.memcpy_htod_async(d_input_1, h_input_1, stream)
context.execute(batch_size=1, bindings=[int(d_input_1), int(d_output)])
cuda.memcpy_dtoh_async(h_output, d_output, stream)
stream.synchronize()
ret = h_output.reshape((batch_size,-1, 1, out_size))
write(ret)
#TODO: clean memory and free resources

0 comments on commit 9da8279

Please sign in to comment.