Skip to content
This repository has been archived by the owner on Dec 1, 2021. It is now read-only.

Commit

Permalink
Merge branch 'master' into kernel_initializer_issue
Browse files Browse the repository at this point in the history
  • Loading branch information
ananno authored Jan 28, 2020
2 parents 830feb5 + 825e4b9 commit 12cca0c
Show file tree
Hide file tree
Showing 16 changed files with 161 additions and 127 deletions.
6 changes: 1 addition & 5 deletions dlk/python/dlk/core/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def pass_quantize_convolutions(graph: Graph) -> None:
width = qtz.width
depth = qtz.channel
depth_upper = (depth + b - 1) // b
qtz.update_shape([height, width, depth_upper, 2, b], "HWChBCl")
qtz.update_shape([depth_upper, height, width, 2, b], "ChHWBCl")


def pass_propagate_datatypes(graph) -> None:
Expand Down Expand Up @@ -530,10 +530,6 @@ def pass_propagate_format(graph) -> None:
b = 32
shape = [(m.channel + b - 1) // b, m.height, m.width, 2, b]
m.update_shape(shape, m.input_nodes[0].dimension)
elif m.input_nodes[0].dimension == 'HWChBCl':
b = 32
shape = [m.height, m.width, (m.channel + b - 1) // b, 2, b]
m.update_shape(shape, m.input_nodes[0].dimension)


def pass_propagate_output_type_backward(graph: Graph) -> None:
Expand Down
2 changes: 1 addition & 1 deletion dlk/python/dlk/templates/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ endif()

if(RUN_ON_FPGA)
list(APPEND SRC_LIB_ALL src/func/arm_neon/batch_normalization.cpp)
list(APPEND SRC_LIB_ALL src/func/impl/fpga/quantized_conv2d_kn2row.cpp)
list(APPEND SRC_LIB_ALL src/func/impl/fpga/quantized_conv2d_accelerator.cpp)
list(APPEND SRC_LIB_ALL src/func/impl/arm_neon/pop_count.cpp)
elseif(USE_NEON)
list(APPEND SRC_LIB_ALL src/func/arm_neon/batch_normalization.cpp)
Expand Down
2 changes: 1 addition & 1 deletion dlk/python/dlk/templates/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ LIB_ARM_OBJ := $(patsubst %.cpp, %.o, $(LIB_ARM_OBJ))

LIB_FPGA_SRC := $(wildcard $(SRC_DIR)/*.S) \
$(SRC_DIR)/func/arm_neon/batch_normalization.cpp \
$(SRC_DIR)/func/impl/fpga/quantized_conv2d_kn2row.cpp \
$(SRC_DIR)/func/impl/fpga/quantized_conv2d_accelerator.cpp \
$(SRC_DIR)/func/impl/arm_neon/pop_count.cpp
LIB_FPGA_OBJ := $(patsubst %.S, %.o, $(LIB_FPGA_SRC))
LIB_FPGA_OBJ := $(patsubst %.cpp, %.o, $(LIB_FPGA_OBJ))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* Copyright 2018 The Blueoil Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef DLK_FUNC_IMPL_QUANTIZED_CONV2D_ACCELERATOR_H_INCLUDED
#define DLK_FUNC_IMPL_QUANTIZED_CONV2D_ACCELERATOR_H_INCLUDED

#include "types.h"
#include "operators.h" // FIXME(nikolay): for convolution_parameters definition, rid of it later
#include "tensor_view.h"

namespace dlk {

namespace impl {

#ifdef RUN_ON_FPGA
using tca_input_t = TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>;
using tca_kernel_t = TensorView<QUANTIZED_PACKED_KERNEL, MemoryLayout::OhIhHWOlIl>;
void TCAConv2d(const tca_input_t& input,
const tca_kernel_t& kernel,
const binary_convolution_parameters &p);
#endif

} // namespace impl

} // namespace dlk

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,22 @@ limitations under the License.
#ifndef DLK_FUNC_IMPL_QUANTIZED_CONV2D_KN2ROW_H_INCLUDED
#define DLK_FUNC_IMPL_QUANTIZED_CONV2D_KN2ROW_H_INCLUDED

#include "global.h"
#include "types.h"
#include "operators.h" // FIXME(nikolay): for convolution_parameters definition, rid of it later
#include "tensor_view.h"

namespace dlk {

namespace impl {

using kn2row_input_elem_t = QUANTIZED_PACKED;

#ifndef RUN_ON_FPGA
void convert_thresholds(BIN_CONV_OUTPUT *input, BIN_CONV_OUTPUT *output, std::size_t channels);
using kn2row_input_elem_t = QuantizedPacked<uint32_t>;
using kn2row_input_t = TensorView<kn2row_input_elem_t, MemoryLayout::HWChBCl>;
using kn2row_kernel_elem_t = QuantizedPacked<uint32_t>;
using kn2row_kernel_t = TensorView<kn2row_kernel_elem_t, MemoryLayout::HWOI>;
void QuantizedConv2DKn2Row(const kn2row_input_t& input,
const kernel_t& kernel,
const binary_convolution_parameters &p);
#else
using kn2row_input_t = TensorView<kn2row_input_elem_t, MemoryLayout::ChHWBCl>;
void TCAConv2d(const kn2row_input_t& input,
const kernel_t& kernel,
const kn2row_kernel_t& kernel,
const binary_convolution_parameters &p);
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License.
#ifndef DLK_FUNC_IMPL_QUANTIZED_CONV2D_TILING_H_INCLUDED
#define DLK_FUNC_IMPL_QUANTIZED_CONV2D_TILING_H_INCLUDED

#include "global.h"
#include "types.h"
#include "operators.h" // FIXME(nikolay): for binary_convolution_parameters definition, rid of it later
#include "tensor_view.h"

Expand All @@ -27,15 +27,18 @@ namespace impl {
using tiling_input_elem_base_t = uint32_t; // hardcoded, not configurable
using tiling_input_elem_t = QuantizedPacked<tiling_input_elem_base_t>;
using tiling_input_t = TensorView<tiling_input_elem_t, MemoryLayout::ChHWBCl>;
using tiling_kernel_elem_base_t = uint32_t; // hardcoded, not configurable
using tiling_kernel_elem_t = QuantizedPacked<tiling_kernel_elem_base_t>;
using tiling_kernel_t = TensorView<tiling_kernel_elem_t, MemoryLayout::OHWI>;

void pack_input_for_tiling(const TensorView<QUANTIZED_NOT_PACKED, MemoryLayout::NHWC>& input,
const tiling_input_t& output);

void convert_thresholds(BIN_CONV_OUTPUT *input, BIN_CONV_OUTPUT *output, std::size_t channels);

void QuantizedConv2DTiling(const tiling_input_t& input,
const kernel_t& kernel,
const binary_convolution_parameters &p);
const tiling_kernel_t& kernel,
const binary_convolution_parameters &p);

} // namespace impl

Expand Down
56 changes: 33 additions & 23 deletions dlk/python/dlk/templates/include/func/quantized_conv2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,23 @@ limitations under the License.
#include <memory>
#include <stdexcept>

#include "global.h"
#include "tensor_view.h"
#include "tensor_convert.h"
#include "operators.h"
#include "time_measurement.h"
#include "func/impl/quantized_conv2d_tiling.h"
#include "func/impl/quantized_conv2d_kn2row.h"
#include "func/impl/quantized_conv2d_accelerator.h"
#ifdef _OPENMP
#include <omp.h>
#endif

template <typename T, MemoryLayout layout>
void QuantizedConv2D(const TensorView<T, layout>& input,
const kernel_t& kernel,
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void QuantizedConv2D(
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
binary_convolution_parameters p) {
Measurement::Start("QuantizedConv2D");

Expand All @@ -51,14 +55,14 @@ void QuantizedConv2D(const TensorView<T, layout>& input,
if ((kh == 3 && kw == 3 && padding == 1) ||
(kh == 1 && kw == 1 && padding == 0)) {
#ifdef RUN_ON_FPGA
dlk::impl::kn2row_input_t::tensor_info_t<std::size_t> shape = {
dlk::impl::tca_input_t::tensor_info_t<std::size_t> shape = {
(ic + QUANTIZED_PACKED::BitCount - 1) / QUANTIZED_PACKED::BitCount,
ih,
iw,
p.bin_input_bitwidth,
QUANTIZED_PACKED::BitCount
};
dlk::impl::kn2row_input_t tmp(p.device_input_buf, shape);
dlk::impl::tca_input_t tmp(p.device_input_buf, shape);
convert_tensor(input, tmp);
dlk::impl::TCAConv2d(tmp, kernel, p);
#elif defined USE_NEON || defined USE_AVX
Expand Down Expand Up @@ -91,10 +95,11 @@ void QuantizedConv2D(const TensorView<T, layout>& input,
Measurement::Stop();
}

template <typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2D(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
const TensorView<T_FLOAT, MemoryLayout::NHWC>& output,
const T_FLOAT scaling_factor,
const binary_convolution_parameters& p) {
Expand Down Expand Up @@ -130,10 +135,11 @@ void func_QuantizedConv2D(

}

template <typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2D(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
const TensorView<T_FLOAT, MemoryLayout::NHWC>& output,
T_FLOAT scaling_factor[],
binary_convolution_parameters p) {
Expand Down Expand Up @@ -167,10 +173,11 @@ void func_QuantizedConv2D(
Measurement::Stop();
}

template<typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2DWithThreshold(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& output,
const T_FLOAT scaling_factor,
const binary_convolution_parameters& p) {
Expand Down Expand Up @@ -201,10 +208,11 @@ void func_QuantizedConv2DWithThreshold(
Measurement::Stop();
}

template <typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2DWithThreshold(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
const TensorView<T_FLOAT, MemoryLayout::NHWC>& output,
const T_FLOAT scaling_factor,
const binary_convolution_parameters& p) {
Expand Down Expand Up @@ -238,21 +246,23 @@ void func_QuantizedConv2DWithThreshold(
Measurement::Stop();
}

template <typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2DWithThreshold(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<QuantizedPacked<T_input>, layout_input>& input,
const TensorView<QuantizedPacked<T_kernel>, layout_kernel>& kernel,
const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& output,
const T_FLOAT scaling_factor[],
const binary_convolution_parameters& p) {
func_QuantizedConv2DWithThreshold(input, kernel, output, scaling_factor[0],
p);
}

template <typename T, MemoryLayout layout>
template <typename T_input, MemoryLayout layout_input,
typename T_kernel, MemoryLayout layout_kernel>
void func_QuantizedConv2DWithThreshold(
const TensorView<T, layout>& input,
const kernel_t& kernel,
const TensorView<T_input, layout_input>& input,
const TensorView<T_kernel, layout_kernel>& kernel,
const TensorView<T_FLOAT, MemoryLayout::NHWC>& output,
T_FLOAT scaling_factor[],
binary_convolution_parameters p) {
Expand Down
2 changes: 1 addition & 1 deletion dlk/python/dlk/templates/include/pack_input_to_qwords.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void pack_input_to_qwords(
struct binary_convolution_parameters bcp);


int pack_input(QUANTIZED_NOT_PACKED input[], size_t input_height, size_t input_width, size_t input_depth,
void pack_input(QUANTIZED_NOT_PACKED input[], size_t input_height, size_t input_width, size_t input_depth,
size_t bits_per_input, QUANTIZED_PACKED output[]);

#endif // DLK_PACK_INPUT_TO_QWORDS_H_INCLUDED
2 changes: 1 addition & 1 deletion dlk/python/dlk/templates/include/quantizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void func_QTZ_linear_mid_tread_half(
const TensorView<T_FLOAT, MemoryLayout::NHWC>& input,
const TensorView<T_INT, MemoryLayout::Atom>& nbit,
const TensorView<T_FLOAT, MemoryLayout::Atom>& max_value,
const TensorView<QUANTIZED_PACKED, MemoryLayout::HWChBCl>& output,
const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& output,
BYTE *temporary_buf);

void func_QTZ_linear_mid_tread_half(
Expand Down
10 changes: 1 addition & 9 deletions dlk/python/dlk/templates/include/tensor_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ limitations under the License.

#include <cassert>
#include <array>
#include "global.h"
#include "types.h"

enum class MemoryLayout {
Atom, // Scalar object
Expand Down Expand Up @@ -244,12 +244,4 @@ class TensorView<QuantizedPacked<T>, memory_layout> {
tensor_info_t<std::size_t> shape;
};

#ifdef RUN_ON_FPGA
using kernel_t = TensorView<QUANTIZED_PACKED_KERNEL, MemoryLayout::OhIhHWOlIl>;
#elif defined USE_NEON || defined USE_AVX
using kernel_t = TensorView<QUANTIZED_PACKED_KERNEL, MemoryLayout::OHWI>;
#else
using kernel_t = TensorView<QUANTIZED_PACKED_KERNEL, MemoryLayout::HWOI>;
#endif

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ void convert_thresholds(BIN_CONV_OUTPUT *input, BIN_CONV_OUTPUT *output, std::si
}

void QuantizedConv2DTiling(const tiling_input_t& input,
const kernel_t& kernel,
const binary_convolution_parameters &p) {
const tiling_kernel_t& kernel,
const binary_convolution_parameters &p) {
constexpr T_UINT InTypeBitWidth = tiling_input_elem_t::BitCount;
convolution_parameters cp = p.normal_conv_params;
const T_UINT out_channels = cp.output_channels;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ limitations under the License.
#include <cstdio>

#include "de10_nano.h"
#include "func/impl/quantized_conv2d_kn2row.h"
#include "func/impl/quantized_conv2d_accelerator.h"
#include "global.h"
#include "network.h"
#include "pack_input_to_qwords.h"
Expand All @@ -35,8 +35,8 @@ namespace dlk
namespace impl
{

void TCAConv2d(const kn2row_input_t& input,
const kernel_t& kernel,
void TCAConv2d(const tca_input_t& input,
const tca_kernel_t& kernel,
const binary_convolution_parameters &p) {

using namespace dlk;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ void convert_thresholds(BIN_CONV_OUTPUT *input, BIN_CONV_OUTPUT *output, std::si
}

void QuantizedConv2DKn2Row(const kn2row_input_t& input,
const kernel_t& kernel,
const binary_convolution_parameters &p) {
const kn2row_kernel_t& kernel,
const binary_convolution_parameters &p) {
using namespace dlk;

T_UINT ic = p.normal_conv_params.kernel_depth;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ void convert_thresholds(BIN_CONV_OUTPUT *input, BIN_CONV_OUTPUT *output, std::si
}

void QuantizedConv2DTiling(const tiling_input_t& input,
const kernel_t& kernel,
const binary_convolution_parameters &p) {
const tiling_kernel_t& kernel,
const binary_convolution_parameters &p) {
constexpr std::size_t InTypeBitWidth = tiling_input_elem_t::BitCount;
convolution_parameters cp = p.normal_conv_params;
const std::size_t out_channels = cp.output_channels;
Expand Down
Loading

0 comments on commit 12cca0c

Please sign in to comment.