Skip to content

Commit

Permalink
add elementwise_mod on x86, increment on host; test=develop
Browse files Browse the repository at this point in the history
  • Loading branch information
zhupengyang committed Feb 5, 2021
1 parent 36afd0b commit b536030
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 129 deletions.
1 change: 0 additions & 1 deletion lite/backends/arm/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
sequence_softmax.cc
norm.cc
topk.cc
increment.cc
pad2d.cc
negative.cc
beam_search.cc
Expand Down
1 change: 0 additions & 1 deletion lite/backends/arm/math/funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "lite/backends/arm/math/gemm_s8.h"
#include "lite/backends/arm/math/gemv_arm_int8.h"
#include "lite/backends/arm/math/im2sequence.h"
#include "lite/backends/arm/math/increment.h"
#include "lite/backends/arm/math/interpolate.h"
#include "lite/backends/arm/math/layout.h"
#include "lite/backends/arm/math/lrn.h"
Expand Down
26 changes: 0 additions & 26 deletions lite/backends/arm/math/increment.cc

This file was deleted.

38 changes: 0 additions & 38 deletions lite/backends/arm/math/increment.h

This file was deleted.

2 changes: 1 addition & 1 deletion lite/kernels/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS
add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} increment_compute_host)
add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math_arm)
Expand Down
38 changes: 2 additions & 36 deletions lite/kernels/arm/increment_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/increment_compute.h"
#include "lite/backends/arm/math/funcs.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

void IncrementCompute::Run() {
auto& ctx = this->ctx_->template As<ARMContext>();
auto& param = this->Param<operators::IncrementParam>();

int total_num = param.X->dims().production();
if (param.X->precision() == PRECISION(kFloat)) {
const auto* x_data = param.X->data<float>();
auto* o_data = param.Out->mutable_data<float>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else if (param.X->precision() == PRECISION(kInt64)) {
const auto* x_data = param.X->data<int64_t>();
auto* o_data = param.Out->mutable_data<int64_t>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else if (param.X->precision() == PRECISION(kInt32)) {
const auto* x_data = param.X->data<int32_t>();
auto* o_data = param.Out->mutable_data<int32_t>();
lite::arm::math::increment(x_data, total_num, param.step, o_data, &ctx);
} else {
LOG(FATAL) << "unsupport input type "
<< PrecisionToStr(param.X->precision());
}
}

} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/host/increment_compute.h"

REGISTER_LITE_KERNEL(increment,
kARM,
kAny,
kNCHW,
paddle::lite::kernels::arm::IncrementCompute,
paddle::lite::kernels::host::IncrementCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
Expand Down
1 change: 1 addition & 0 deletions lite/kernels/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_kernel(box_coder_compute_host Host basic SRCS box_coder_compute.cc DEPS ${li
add_kernel(gather_compute_host Host extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(gather_nd_compute_host Host extra SRCS gather_nd_compute.cc DEPS ${lite_kernel_deps})
add_kernel(gather_tree_compute_host Host extra SRCS gather_tree_compute.cc DEPS ${lite_kernel_deps})
add_kernel(increment_compute_host Host extra SRCS increment_compute.cc DEPS ${lite_kernel_deps})
add_kernel(pad3d_compute_host Host extra SRCS pad3d_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(select_input_compute_host Host extra SRCS select_input_compute.cc DEPS ${lite_kernel_deps} math_host)
add_kernel(tensor_array_to_tensor_compute_host Host extra SRCS tensor_array_to_tensor_compute.cc DEPS ${lite_kernel_deps} math_host)
Expand Down
6 changes: 3 additions & 3 deletions lite/kernels/host/expand_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void ExpandCompute<T, PType>::Run() {

using expand_float =
paddle::lite::kernels::host::ExpandCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, def)
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, float32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
Expand All @@ -100,8 +100,8 @@ REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, def)
.Finalize();

using expand_int32 =
paddle::lite::kernels::host::ExpandCompute<int, PRECISION(kInt32)>;
REGISTER_LITE_KERNEL(expand, kHost, kInt32, kAny, expand_int32, def)
paddle::lite::kernels::host::ExpandCompute<int, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_int32, int32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kInt32),
Expand Down
74 changes: 74 additions & 0 deletions lite/kernels/host/increment_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/host/increment_compute.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace host {

template <class T>
void increment(const T* input, const int n, const T step, T* out) {
for (int i = 0; i < n; i++) {
out[i] = input[i] + step;
}
}

void IncrementCompute::Run() {
auto& param = this->Param<operators::IncrementParam>();

int total_num = param.X->numel();
switch (param.X->precision()) {
case PRECISION(kFloat): {
const auto* x_data = param.X->data<float>();
auto* o_data = param.Out->mutable_data<float>();
float step = static_cast<float>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
case PRECISION(kInt64): {
const auto* x_data = param.X->data<int64_t>();
auto* o_data = param.Out->mutable_data<int64_t>();
int64_t step = static_cast<int64_t>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
case PRECISION(kInt32): {
const auto* x_data = param.X->data<int32_t>();
auto* o_data = param.Out->mutable_data<int32_t>();
int32_t step = static_cast<int32_t>(param.step);
increment(x_data, total_num, step, o_data);
break;
}
default:
LOG(FATAL) << "unsupport input type "
<< PrecisionToStr(param.X->precision());
}
}

} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle

REGISTER_LITE_KERNEL(increment,
kHost,
kAny,
kNCHW,
paddle::lite::kernels::host::IncrementCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.Finalize();
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,15 @@
// limitations under the License.

#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {

class IncrementCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
class IncrementCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
void Run() override;

Expand All @@ -32,7 +30,7 @@ class IncrementCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
private:
};

} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
22 changes: 22 additions & 0 deletions lite/kernels/x86/elementwise_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,25 @@ REGISTER_LITE_KERNEL(
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ElementwiseModCompute<int32_t>,
int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ElementwiseModCompute<int64_t>,
int64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();
25 changes: 25 additions & 0 deletions lite/kernels/x86/elementwise_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ struct FloorDivFunctor {
}
};

template <typename T>
struct ModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = a % b;
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
return res;
}
};

template <typename T>
class ElementwiseSubCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
Expand Down Expand Up @@ -117,6 +126,22 @@ class ElementwiseFloorDivCompute
virtual ~ElementwiseFloorDivCompute() = default;
};

template <typename T>
class ElementwiseModCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
param.Out->template mutable_data<T>();
ElementwiseComputeEx<ModFunctor<T>, lite::TargetType::kX86, T>(
context, param.X, param.Y, param.axis, ModFunctor<T>(), param.Out);
}

virtual ~ElementwiseModCompute() = default;
};

} // namespace x86
} // namespace kernels
} // namespace lite
Expand Down
Loading

0 comments on commit b536030

Please sign in to comment.