Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add elementwise_mod on x86, increment on host #5472

Merged
merged 4 commits into from
Feb 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion lite/backends/arm/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
sequence_softmax.cc
norm.cc
topk.cc
increment.cc
pad2d.cc
negative.cc
beam_search.cc
Expand Down
1 change: 0 additions & 1 deletion lite/backends/arm/math/funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "lite/backends/arm/math/gemm_s8.h"
#include "lite/backends/arm/math/gemv_arm_int8.h"
#include "lite/backends/arm/math/im2sequence.h"
#include "lite/backends/arm/math/increment.h"
#include "lite/backends/arm/math/interpolate.h"
#include "lite/backends/arm/math/layout.h"
#include "lite/backends/arm/math/lrn.h"
Expand Down
26 changes: 0 additions & 26 deletions lite/backends/arm/math/increment.cc

This file was deleted.

38 changes: 0 additions & 38 deletions lite/backends/arm/math/increment.h

This file was deleted.

2 changes: 1 addition & 1 deletion lite/kernels/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS
add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} increment_compute_host)
add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math_arm)
Expand Down
38 changes: 2 additions & 36 deletions lite/kernels/arm/increment_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/increment_compute.h"
#include "lite/backends/arm/math/funcs.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

void IncrementCompute::Run() {
auto& ctx = this->ctx_->template As<ARMContext>();
auto& param = this->Param<operators::IncrementParam>();

int total_num = param.X->dims().production();
if (param.X->precision() == PRECISION(kFloat)) {
const auto* x_data = param.X->data<float>();
auto* o_data = param.Out->mutable_data<float>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else if (param.X->precision() == PRECISION(kInt64)) {
const auto* x_data = param.X->data<int64_t>();
auto* o_data = param.Out->mutable_data<int64_t>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else if (param.X->precision() == PRECISION(kInt32)) {
const auto* x_data = param.X->data<int32_t>();
auto* o_data = param.Out->mutable_data<int32_t>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else {
LOG(FATAL) << "unsupport input type "
<< PrecisionToStr(param.X->precision());
}
}

} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/host/increment_compute.h"

REGISTER_LITE_KERNEL(increment,
kARM,
kAny,
kNCHW,
paddle::lite::kernels::arm::IncrementCompute,
paddle::lite::kernels::host::IncrementCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
Expand Down
1 change: 1 addition & 0 deletions lite/kernels/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_kernel(box_coder_compute_host Host basic SRCS box_coder_compute.cc DEPS ${li
add_kernel(gather_compute_host Host extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(gather_nd_compute_host Host extra SRCS gather_nd_compute.cc DEPS ${lite_kernel_deps})
add_kernel(gather_tree_compute_host Host extra SRCS gather_tree_compute.cc DEPS ${lite_kernel_deps})
add_kernel(increment_compute_host Host extra SRCS increment_compute.cc DEPS ${lite_kernel_deps})
add_kernel(pad3d_compute_host Host extra SRCS pad3d_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(select_input_compute_host Host extra SRCS select_input_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(tensor_array_to_tensor_compute_host Host extra SRCS tensor_array_to_tensor_compute.cc DEPS ${lite_kernel_deps} math_host)
Expand Down
18 changes: 9 additions & 9 deletions lite/kernels/host/expand_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void ExpandCompute<T, PType>::Run() {

using expand_float =
paddle::lite::kernels::host::ExpandCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, def)
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, float32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
Expand All @@ -90,18 +90,18 @@ REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, def)
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindInput("expand_times_tensor",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kInt32),
DATALAYOUT(kAny))})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.Finalize();

using expand_int32 =
paddle::lite::kernels::host::ExpandCompute<int, PRECISION(kInt32)>;
REGISTER_LITE_KERNEL(expand, kHost, kInt32, kAny, expand_int32, def)
paddle::lite::kernels::host::ExpandCompute<int, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_int32, int32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt32),
Expand All @@ -111,9 +111,9 @@ REGISTER_LITE_KERNEL(expand, kHost, kInt32, kAny, expand_int32, def)
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindInput("expand_times_tensor",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kInt32),
DATALAYOUT(kAny))})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt32),
Expand Down
74 changes: 74 additions & 0 deletions lite/kernels/host/increment_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/host/increment_compute.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace host {

template <class T>
void increment(const T* input, const int n, const T step, T* out) {
for (int i = 0; i < n; i++) {
out[i] = input[i] + step;
}
}

void IncrementCompute::Run() {
auto& param = this->Param<operators::IncrementParam>();

int total_num = param.X->numel();
switch (param.X->precision()) {
case PRECISION(kFloat): {
const auto* x_data = param.X->data<float>();
auto* o_data = param.Out->mutable_data<float>();
float step = static_cast<float>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
case PRECISION(kInt64): {
const auto* x_data = param.X->data<int64_t>();
auto* o_data = param.Out->mutable_data<int64_t>();
int64_t step = static_cast<int64_t>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
case PRECISION(kInt32): {
const auto* x_data = param.X->data<int32_t>();
auto* o_data = param.Out->mutable_data<int32_t>();
int32_t step = static_cast<int32_t>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
default:
LOG(FATAL) << "unsupport input type "
<< PrecisionToStr(param.X->precision());
}
}

} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle

REGISTER_LITE_KERNEL(increment,
kHost,
kAny,
kNCHW,
paddle::lite::kernels::host::IncrementCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.Finalize();
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,15 @@
// limitations under the License.

#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {

class IncrementCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
class IncrementCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
void Run() override;

Expand All @@ -32,7 +30,7 @@ class IncrementCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
private:
};

} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
22 changes: 22 additions & 0 deletions lite/kernels/x86/elementwise_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,25 @@ REGISTER_LITE_KERNEL(
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ElementwiseModCompute<int32_t>,
int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ElementwiseModCompute<int64_t>,
int64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();
25 changes: 25 additions & 0 deletions lite/kernels/x86/elementwise_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ struct FloorDivFunctor {
}
};

template <typename T>
struct ModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = a % b;
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
return res;
}
};

template <typename T>
class ElementwiseSubCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
Expand Down Expand Up @@ -117,6 +126,22 @@ class ElementwiseFloorDivCompute
virtual ~ElementwiseFloorDivCompute() = default;
};

template <typename T>
class ElementwiseModCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
param.Out->template mutable_data<T>();
ElementwiseComputeEx<ModFunctor<T>, lite::TargetType::kX86, T>(
context, param.X, param.Y, param.axis, ModFunctor<T>(), param.Out);
}

virtual ~ElementwiseModCompute() = default;
};

} // namespace x86
} // namespace kernels
} // namespace lite
Expand Down
2 changes: 1 addition & 1 deletion lite/kernels/xpu/fill_constant_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ REGISTER_LITE_KERNEL(fill_constant,
.BindInput("ShapeTensorList",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny))})
.BindPaddleOpVersion("fill_constant", 1)
.BindPaddleOpVersion("fill_constant", 2)
.Finalize();
Loading