fix:transform the data from cpu to gpu when trt is used (#37427)

PaddlePaddle · Nov 24, 2021 · 49366a6 · 49366a6
1 parent be3b774
commit 49366a6
Showing 1 changed file with 8 additions and 0 deletions.
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/data_device_transform.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
@@ -421,6 +422,13 @@ class TensorRTEngineOp : public framework::OperatorBase {
       // convert input and copy to TRT engine's buffer
       auto &t =
           inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
+      // check the input_tensor
+      if (!platform::is_gpu_place(t.place())) {
+        framework::Tensor out;
+        platform::CUDAPlace dst_place;
+        framework::TransDataDevice(t, dst_place, &out);
+        t.ShareDataWith(out);
+      }
       auto t_shape = framework::vectorize<int64_t>(t.dims());
       const int bind_index = engine->engine()->getBindingIndex(x.c_str());
       PADDLE_ENFORCE_LT(