Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Host] move sequence_expand #5685

Merged
merged 1 commit into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lite/kernels/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ add_kernel(density_prior_box_compute_arm ARM basic SRCS density_prior_box_comput
add_kernel(axpy_compute_arm ARM extra SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_max_compute_arm ARM extra SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_min_compute_arm ARM extra SRCS reduce_min_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} sequence_expand_compute_host)
add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_pool_compute_arm ARM extra SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_conv_compute_arm ARM extra SRCS sequence_conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
Expand Down
110 changes: 2 additions & 108 deletions lite/kernels/arm/sequence_expand_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,119 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/sequence_expand_compute.h"
#include <vector>
#include "lite/backends/arm/math/funcs.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

void SequenceExpandFunc(const Tensor& x,
const std::vector<uint64_t>& x_lod,
const std::vector<uint64_t>& ref_lod,
Tensor* out) {
uint64_t out_offset = 0;
int64_t x_item_length = x.numel() / x.dims()[0];
auto out_data = out->mutable_data<float>();
auto x_data = x.data<float>();
for (size_t i = 1; i < ref_lod.size(); ++i) {
uint64_t repeat_num = ref_lod[i] - ref_lod[i - 1];
uint64_t x_start = x_lod[i - 1];
uint64_t x_end = x_lod[i];
uint64_t x_seq_len = x_end - x_start;
if (repeat_num > 0) {
uint64_t out_start = out_offset;
if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset];
}
for (uint64_t j = 0; j < repeat_num; j++) {
for (uint64_t k = 0; k < x_seq_len; k++) {
for (int l = 0; l < x_item_length; l++) {
out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
x_data[(x_start + k) * x_item_length + l];
}
}
}
}
out_offset += repeat_num;
}
}

void SequenceExpandCompute::PrepareForRun() {}

void SequenceExpandCompute::Run() {
/*
auto& param = Param<operators::SequenceExpandParam>();
const float* x_data = param.X->data<float>();
int width = param.X->numel() / param.X->dims()[0];
auto& output = param.Out;
const auto x_lod = param.X->lod();
const auto y_lod = param.Y->lod();
int ref_level = param.ref_level;
if (ref_level == -1) {
ref_level = y_lod.size() - 1;
}
lite::arm::math::SequenceExpandImpl(
x_data, x_lod, width, y_lod[ref_level], output);
*/
auto& param = Param<operators::SequenceExpandParam>();
auto* x = param.X;
auto* y = param.Y;
auto* out = param.Out;
int ref_level = param.ref_level;
auto x_lod = x->lod();
auto y_lod = y->lod();

if (ref_level == -1) ref_level = y_lod.size() - 1;

out->mutable_data<float>();
if (y_lod[ref_level].size() <= 1) {
out->CopyDataFrom(*x);
return;
}

std::vector<uint64_t> out_lod;
if (x_lod.size() == 1) {
out_lod.push_back(0);
uint64_t out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
uint64_t repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
uint64_t x_start = x_lod[0][i - 1];
uint64_t x_end = x_lod[0][i];
uint64_t x_seq_len = x_end - x_start;
for (uint64_t j = 0; j < repeat_num; ++j) {
out_lod.push_back(out_lod.back() + x_seq_len);
out_offset++;
}
}
// write lod to out if x has lod
auto& ref_lod = *out->mutable_lod();
ref_lod[0] = out_lod;
}

std::vector<uint64_t> ref_x_lod;
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
ref_x_lod.resize(x->dims()[0] + 1);
std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
}

SequenceExpandFunc(*x, ref_x_lod, y_lod[ref_level], out);
}

} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/host/sequence_expand_compute.h"

REGISTER_LITE_KERNEL(sequence_expand,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::SequenceExpandCompute,
paddle::lite::kernels::host::SequenceExpandCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
Expand Down
1 change: 1 addition & 0 deletions lite/kernels/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_kernel(cos_compute_host Host extra SRCS cos_compute.cc DEPS ${lite_kernel_de
add_kernel(crop_compute_host Host extra SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(crop_tensor_compute_host Host extra SRCS crop_tensor_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(sequence_unpad_compute_host Host extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(sequence_expand_compute_host Host extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(flatten_contiguous_range_compute_host Host extra SRCS flatten_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shuffle_channel_compute_host Host extra SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(activation_compute_host Host train SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_host)
Expand Down
115 changes: 115 additions & 0 deletions lite/kernels/host/sequence_expand_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/host/sequence_expand_compute.h"
#include <vector>

namespace paddle {
namespace lite {
namespace kernels {
namespace host {

void SequenceExpandFunc(const Tensor& x,
const std::vector<uint64_t>& x_lod,
const std::vector<uint64_t>& ref_lod,
Tensor* out) {
uint64_t out_offset = 0;
int64_t x_item_length = x.numel() / x.dims()[0];
auto out_data = out->mutable_data<float>();
auto x_data = x.data<float>();
for (size_t i = 1; i < ref_lod.size(); ++i) {
uint64_t repeat_num = ref_lod[i] - ref_lod[i - 1];
uint64_t x_start = x_lod[i - 1];
uint64_t x_end = x_lod[i];
uint64_t x_seq_len = x_end - x_start;
if (repeat_num > 0) {
uint64_t out_start = out_offset;
if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset];
}
for (uint64_t j = 0; j < repeat_num; j++) {
for (uint64_t k = 0; k < x_seq_len; k++) {
for (int l = 0; l < x_item_length; l++) {
out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
x_data[(x_start + k) * x_item_length + l];
}
}
}
}
out_offset += repeat_num;
}
}

void SequenceExpandCompute::Run() {
auto& param = Param<operators::SequenceExpandParam>();
auto* x = param.X;
auto* y = param.Y;
auto* out = param.Out;
int ref_level = param.ref_level;
auto x_lod = x->lod();
auto y_lod = y->lod();

if (ref_level == -1) ref_level = y_lod.size() - 1;

out->mutable_data<float>();
if (y_lod[ref_level].size() <= 1) {
out->CopyDataFrom(*x);
return;
}

std::vector<uint64_t> out_lod;
if (x_lod.size() == 1) {
out_lod.push_back(0);
uint64_t out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
uint64_t repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
uint64_t x_start = x_lod[0][i - 1];
uint64_t x_end = x_lod[0][i];
uint64_t x_seq_len = x_end - x_start;
for (uint64_t j = 0; j < repeat_num; ++j) {
out_lod.push_back(out_lod.back() + x_seq_len);
out_offset++;
}
}
// write lod to out if x has lod
auto& ref_lod = *out->mutable_lod();
ref_lod[0] = out_lod;
}

std::vector<uint64_t> ref_x_lod;
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
ref_x_lod.resize(x->dims()[0] + 1);
std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
}

SequenceExpandFunc(*x, ref_x_lod, y_lod[ref_level], out);
}

} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle

REGISTER_LITE_KERNEL(sequence_expand,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::SequenceExpandCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,17 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {

class SequenceExpandCompute
: public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
: public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
void PrepareForRun() override;

void Run() override;

virtual ~SequenceExpandCompute() = default;

private:
};

} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
13 changes: 7 additions & 6 deletions lite/tests/kernels/sequence_expand_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,14 @@ void test_sequence_expand(Place place) {
}

TEST(SequenceExpand, precision) {
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_sequence_expand(place);
Place place;
#if defined(LITE_WITH_ARM) || defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
return;
#endif

test_sequence_expand(place);
}

} // namespace lite
Expand Down