PaddlePaddle · zhupengyang · Mar 12, 2021 · Mar 11, 2021
@@ -64,7 +64,7 @@ add_kernel(density_prior_box_compute_arm ARM basic SRCS density_prior_box_comput
 add_kernel(axpy_compute_arm ARM extra SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(reduce_max_compute_arm ARM extra SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(reduce_min_compute_arm ARM extra SRCS reduce_min_compute.cc DEPS ${lite_kernel_deps} math_arm)
-add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
+add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} sequence_expand_compute_host)
 add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(sequence_pool_compute_arm ARM extra SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(sequence_conv_compute_arm ARM extra SRCS sequence_conv_compute.cc DEPS ${lite_kernel_deps} math_arm)

@@ -12,119 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "lite/kernels/arm/sequence_expand_compute.h"
-#include <vector>
-#include "lite/backends/arm/math/funcs.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace arm {
-
-void SequenceExpandFunc(const Tensor& x,
-                        const std::vector<uint64_t>& x_lod,
-                        const std::vector<uint64_t>& ref_lod,
-                        Tensor* out) {
-  uint64_t out_offset = 0;
-  int64_t x_item_length = x.numel() / x.dims()[0];
-  auto out_data = out->mutable_data<float>();
-  auto x_data = x.data<float>();
-  for (size_t i = 1; i < ref_lod.size(); ++i) {
-    uint64_t repeat_num = ref_lod[i] - ref_lod[i - 1];
-    uint64_t x_start = x_lod[i - 1];
-    uint64_t x_end = x_lod[i];
-    uint64_t x_seq_len = x_end - x_start;
-    if (repeat_num > 0) {
-      uint64_t out_start = out_offset;
-      if (out->lod().size() == 1) {
-        out_start = out->lod()[0][out_offset];
-      }
-      for (uint64_t j = 0; j < repeat_num; j++) {
-        for (uint64_t k = 0; k < x_seq_len; k++) {
-          for (int l = 0; l < x_item_length; l++) {
-            out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
-                x_data[(x_start + k) * x_item_length + l];
-          }
-        }
-      }
-    }
-    out_offset += repeat_num;
-  }
-}
-
-void SequenceExpandCompute::PrepareForRun() {}
-
-void SequenceExpandCompute::Run() {
-  /*
-  auto& param = Param<operators::SequenceExpandParam>();
-  const float* x_data = param.X->data<float>();
-  int width = param.X->numel() / param.X->dims()[0];
-  auto& output = param.Out;
-  const auto x_lod = param.X->lod();
-  const auto y_lod = param.Y->lod();
-  int ref_level = param.ref_level;
-  if (ref_level == -1) {
-    ref_level = y_lod.size() - 1;
-  }
-  lite::arm::math::SequenceExpandImpl(
-      x_data, x_lod, width, y_lod[ref_level], output);
-  */
-  auto& param = Param<operators::SequenceExpandParam>();
-  auto* x = param.X;
-  auto* y = param.Y;
-  auto* out = param.Out;
-  int ref_level = param.ref_level;
-  auto x_lod = x->lod();
-  auto y_lod = y->lod();
-
-  if (ref_level == -1) ref_level = y_lod.size() - 1;
-
-  out->mutable_data<float>();
-  if (y_lod[ref_level].size() <= 1) {
-    out->CopyDataFrom(*x);
-    return;
-  }
-
-  std::vector<uint64_t> out_lod;
-  if (x_lod.size() == 1) {
-    out_lod.push_back(0);
-    uint64_t out_offset = 0;
-    for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
-      uint64_t repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
-      uint64_t x_start = x_lod[0][i - 1];
-      uint64_t x_end = x_lod[0][i];
-      uint64_t x_seq_len = x_end - x_start;
-      for (uint64_t j = 0; j < repeat_num; ++j) {
-        out_lod.push_back(out_lod.back() + x_seq_len);
-        out_offset++;
-      }
-    }
-    // write lod to out if x has lod
-    auto& ref_lod = *out->mutable_lod();
-    ref_lod[0] = out_lod;
-  }
-
-  std::vector<uint64_t> ref_x_lod;
-  if (x->lod().size() == 1) {
-    ref_x_lod = x->lod()[0];
-  } else {
-    ref_x_lod.resize(x->dims()[0] + 1);
-    std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
-  }
-
-  SequenceExpandFunc(*x, ref_x_lod, y_lod[ref_level], out);
-}
-
-}  // namespace arm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
+#include "lite/kernels/host/sequence_expand_compute.h"
 
 REGISTER_LITE_KERNEL(sequence_expand,
                      kARM,
                      kFloat,
                      kNCHW,
-                     paddle::lite::kernels::arm::SequenceExpandCompute,
+                     paddle::lite::kernels::host::SequenceExpandCompute,
                      def)
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})

@@ -48,6 +48,7 @@ add_kernel(cos_compute_host Host extra SRCS cos_compute.cc DEPS ${lite_kernel_de
 add_kernel(crop_compute_host Host extra SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_host)
 add_kernel(crop_tensor_compute_host Host extra SRCS crop_tensor_compute.cc DEPS ${lite_kernel_deps} math_host)
 add_kernel(sequence_unpad_compute_host Host extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps} math_host)
+add_kernel(sequence_expand_compute_host Host extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(flatten_contiguous_range_compute_host Host extra SRCS flatten_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(shuffle_channel_compute_host Host extra SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_host)
 add_kernel(activation_compute_host Host train SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_host)

@@ -0,0 +1,115 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/sequence_expand_compute.h"
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+void SequenceExpandFunc(const Tensor& x,
+                        const std::vector<uint64_t>& x_lod,
+                        const std::vector<uint64_t>& ref_lod,
+                        Tensor* out) {
+  uint64_t out_offset = 0;
+  int64_t x_item_length = x.numel() / x.dims()[0];
+  auto out_data = out->mutable_data<float>();
+  auto x_data = x.data<float>();
+  for (size_t i = 1; i < ref_lod.size(); ++i) {
+    uint64_t repeat_num = ref_lod[i] - ref_lod[i - 1];
+    uint64_t x_start = x_lod[i - 1];
+    uint64_t x_end = x_lod[i];
+    uint64_t x_seq_len = x_end - x_start;
+    if (repeat_num > 0) {
+      uint64_t out_start = out_offset;
+      if (out->lod().size() == 1) {
+        out_start = out->lod()[0][out_offset];
+      }
+      for (uint64_t j = 0; j < repeat_num; j++) {
+        for (uint64_t k = 0; k < x_seq_len; k++) {
+          for (int l = 0; l < x_item_length; l++) {
+            out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
+                x_data[(x_start + k) * x_item_length + l];
+          }
+        }
+      }
+    }
+    out_offset += repeat_num;
+  }
+}
+
+void SequenceExpandCompute::Run() {
+  auto& param = Param<operators::SequenceExpandParam>();
+  auto* x = param.X;
+  auto* y = param.Y;
+  auto* out = param.Out;
+  int ref_level = param.ref_level;
+  auto x_lod = x->lod();
+  auto y_lod = y->lod();
+
+  if (ref_level == -1) ref_level = y_lod.size() - 1;
+
+  out->mutable_data<float>();
+  if (y_lod[ref_level].size() <= 1) {
+    out->CopyDataFrom(*x);
+    return;
+  }
+
+  std::vector<uint64_t> out_lod;
+  if (x_lod.size() == 1) {
+    out_lod.push_back(0);
+    uint64_t out_offset = 0;
+    for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
+      uint64_t repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
+      uint64_t x_start = x_lod[0][i - 1];
+      uint64_t x_end = x_lod[0][i];
+      uint64_t x_seq_len = x_end - x_start;
+      for (uint64_t j = 0; j < repeat_num; ++j) {
+        out_lod.push_back(out_lod.back() + x_seq_len);
+        out_offset++;
+      }
+    }
+    // write lod to out if x has lod
+    auto& ref_lod = *out->mutable_lod();
+    ref_lod[0] = out_lod;
+  }
+
+  std::vector<uint64_t> ref_x_lod;
+  if (x->lod().size() == 1) {
+    ref_x_lod = x->lod()[0];
+  } else {
+    ref_x_lod.resize(x->dims()[0] + 1);
+    std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
+  }
+
+  SequenceExpandFunc(*x, ref_x_lod, y_lod[ref_level], out);
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(sequence_expand,
+                     kHost,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::host::SequenceExpandCompute,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
+    .Finalize();
@@ -19,21 +19,17 @@
 namespace paddle {
 namespace lite {
 namespace kernels {
-namespace arm {
+namespace host {
 
 class SequenceExpandCompute
-    : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+    : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
  public:
-  void PrepareForRun() override;
-
   void Run() override;
 
   virtual ~SequenceExpandCompute() = default;
-
- private:
 };
 
-}  // namespace arm
+}  // namespace host
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
@@ -173,13 +173,14 @@ void test_sequence_expand(Place place) {
 }
 
 TEST(SequenceExpand, precision) {
-// #ifdef LITE_WITH_X86
-//   Place place(TARGET(kX86));
-// #endif
-#ifdef LITE_WITH_ARM
-  Place place(TARGET(kARM));
-  test_sequence_expand(place);
+  Place place;
+#if defined(LITE_WITH_ARM) || defined(LITE_WITH_X86)
+  place = TARGET(kHost);
+#else
+  return;
 #endif
+
+  test_sequence_expand(place);
 }
 
 }  // namespace lite