Merge branch 'master' into rename_QTZ_linear_mid_tread_half

blue-oil · Mar 25, 2020 · f4d6879 · f4d6879
2 parents 7867a33 + 969633c
commit f4d6879
Show file tree

Hide file tree

Showing 50 changed files with 211 additions and 139 deletions.
diff --git a/Makefile b/Makefile
@@ -10,6 +10,20 @@ deps:
 	# Update dependencies
 	git submodule update --init --recursive
 
+.PHONY: install
+install: deps
+	pip install -e .[cpu,tests,docs]
+	pip install pycocotools==2.0.0
+
+.PHONY: install-gpu
+install-gpu: install
+	pip install -e .[gpu]
+	pip install -e .[dist]
+
+.PHONY: lint
+lint:
+	flake8 ./blueoil ./tests --exclude=templates,converter
+
 .PHONY: build
 build: deps
 	# Build docker image
@@ -46,7 +60,7 @@ test-lmnet: test-blueoil-pep8 test-unit-main
 test-blueoil-pep8: build
 	# Check blueoil pep8
 	# FIXME: blueoil/templates and blueoil/converter have a lot of errors with flake8
-	docker run ${DOCKER_OPT} $(IMAGE_NAME):$(BUILD_VERSION) /bin/bash -c "flake8 ./blueoil --exclude=templates,converter"
+	docker run ${DOCKER_OPT} $(IMAGE_NAME):$(BUILD_VERSION) /bin/bash -c "flake8 ./blueoil ./tests --exclude=templates,converter"
 
 .PHONY: test-unit-main
 test-unit-main: build

diff --git a/blueoil/cmd/convert.py b/blueoil/cmd/convert.py
@@ -121,15 +121,8 @@ def make_all(project_dir, output_dir):
     # Change current directory to project directory
     os.chdir(project_dir)
 
-    cxxflags_cache = os.getenv("CXXFLAGS", "")
-
     # Make each target and move output files
     for target, output in make_list:
-        if target in {"lm_x86", "lm_x86_avx", "lm_arm", "lm_fpga", "lm_aarch64"}:
-            os.environ["CXXFLAGS"] = cxxflags_cache + " -DFUNC_TIME_MEASUREMENT"
-        else:
-            os.environ["CXXFLAGS"] = cxxflags_cache
-
         subprocess.run(("make", "clean", "--quiet"))
         subprocess.run(("make", target, "-j4", "--quiet"))
         strip_binary(output)

diff --git a/blueoil/cmd/output_event.py b/blueoil/cmd/output_event.py
@@ -34,7 +34,7 @@ def _value_step_list(event_accumulator, metrics_key):
         events = event_accumulator.Scalars(metrics_key)
         return [(event.value, event.step) for event in events]
     except KeyError as e:
-        print("Key {} was not found in {}".format(metrics_key, event_accumulator.path))
+        print("Key {} was not found in {}\n{}".format(metrics_key, event_accumulator.path, e))
         return []
 
 
@@ -55,14 +55,16 @@ def output(tensorboard_dir, output_dir, metrics_keys, steps, output_file_base="m
         event_accumulators.append(event_accumulator)
 
     if not metrics_keys:
-        metrics_keys = {metrics_key
-                        for event_accumulator in event_accumulators
-                        for metrics_key in _get_metrics_keys(event_accumulator)}
+        metrics_keys = {
+            metrics_key
+            for event_accumulator in event_accumulators
+            for metrics_key in _get_metrics_keys(event_accumulator)
+        }
 
     columns = [_column_name(event_accumulator, metrics_key)
                for event_accumulator, metrics_key in itertools.product(event_accumulators, metrics_keys)]
     columns.sort()
-    df = pd.DataFrame([],  columns=columns)
+    df = pd.DataFrame([], columns=columns)
 
     for event_accumulator in event_accumulators:
         for metrics_key in metrics_keys:
@@ -102,13 +104,8 @@ def output(tensorboard_dir, output_dir, metrics_keys, steps, output_file_base="m
     print(message)
 
 
-@click.command(context_settings=dict(help_option_names=['-h', '--help']))
-@click.option(
-    "-i",
-    "--experiment_id",
-    help="id of target experiment",
-    required=True,
-)
+@click.command(context_settings=dict(help_option_names=["-h", "--help"]))
+@click.option("-i", "--experiment_id", help="id of target experiment", required=True)
 @click.option(
     "-k",
     "--metrics_keys",
@@ -135,8 +132,14 @@ def output(tensorboard_dir, output_dir, metrics_keys, steps, output_file_base="m
 def main(output_file_base, metrics_keys, steps, experiment_id):
     environment.init(experiment_id)
 
-    output(environment.TENSORBOARD_DIR, environment.EXPERIMENT_DIR, metrics_keys, steps, output_file_base="metrics",)
+    output(
+        environment.TENSORBOARD_DIR,
+        environment.EXPERIMENT_DIR,
+        metrics_keys,
+        steps,
+        output_file_base="metrics",
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/blueoil/cmd/train.py b/blueoil/cmd/train.py
@@ -395,7 +395,7 @@ def train(config_file, experiment_id=None, recreate=False):
     if not tf.io.gfile.exists(checkpoint):
         raise Exception('Checkpoints are not created in {}'.format(experiment_dir))
 
-    with open(checkpoint) as stream:
+    with tf.io.gfile.GFile(checkpoint) as stream:
         data = yaml.load(stream)
     checkpoint_name = os.path.basename(data['model_checkpoint_path'])
 

diff --git a/blueoil/converter/core/operators.py b/blueoil/converter/core/operators.py
@@ -2562,7 +2562,7 @@ def __init__(self,
     def _check_consistency(self) -> None:
         """
         This check the following constraints:
-            1. qunatized-packed data requires depth of input must be multiple of kernel_size^2 * 32
+            1. quantized-packed data requires depth of input must be multiple of kernel_size^2 * 32
         """
         super()._check_consistency()
         if self.input_ops['input'].op_type == 'LinearMidTreadHalfQuantizer' and \

diff --git a/blueoil/converter/core/optimizer.py b/blueoil/converter/core/optimizer.py
@@ -277,41 +277,44 @@ def pass_compute_thresholds(graph: Graph) -> None:
         # The threshold_table is numpy array that holds the threshold values for all channels
         threshold_table = np.empty([ch, n + 1], dtype=np.int32)
 
+        # Compute increasing order or decreasing order
+        is_inc_order = [True for i in range(ch)]
+        if quantizer_conv_weights.op_type == 'BinaryChannelWiseMeanScalingQuantizer':
+            for c in range(ch):
+                is_inc_order[c] = scaling_factor[c] > 0
+        else:
+            for c in range(ch):
+                is_inc_order[c] = scaling_factor > 0
+
+        for op in p[:-1]:
+            if op.op_type == 'BatchNormalization':
+                bn_scale = op.input_ops['scale'].data
+                for c in range(ch):
+                    if bn_scale[c] < 0:
+                        is_inc_order[c] = not is_inc_order[c]
+
+        for c in range(ch):
+            threshold_table[c, -1] = 1 \
+                if is_inc_order[c] else -1
+
         # Compute threshold (t0, t1, t2)
         th_val = [0.5 + i for i in range(n)]
         for th_id, th_v in enumerate(th_val):
             init_threshold = np.full(ch, th_v, dtype=np.float64)
 
             # run calculation in reverse order, for example, q -> bn -> scaling
-            bn_nega_idx = []
             trans_th = {'data': init_threshold}
             for op in p[:-1]:
                 trans_th = op.de_run(**trans_th)
-                if op.op_type == 'BatchNormalization':
-                    bn_scale = op.input_ops['scale'].data
-                    bn_nega_idx = [v for v in range(len(bn_scale)) if bn_scale[v] < 0]
             threshold = (trans_th['data'] * np.float64(n)) / (np.float64(max_v) * scaling_factor)
 
             # take care of threshold values that are larger than 13-bit signed integer
             threshold[threshold > max_th_value] = max_th_value
             threshold[threshold < -max_th_value] = -max_th_value
 
             for ch_id, th_per_ch in enumerate(threshold):
-                if quantizer_conv_weights.op_type == 'BinaryChannelWiseMeanScalingQuantizer':
-                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor[ch_id] < 0) ^ (ch_id in bn_nega_idx) \
-                        else int(math.ceil(th_per_ch))
-                else:
-                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor < 0) ^ (ch_id in bn_nega_idx) \
-                        else int(math.ceil(th_per_ch))
-
-        for c in range(ch):
-            threshold_table[c, -1] = 1 \
-                if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
-            if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0):
-                threshold_table[c, -1] = 1
-                threshold_table[c, 0:-1] = max_th_value
+                threshold_table[ch_id, th_id] = int(math.ceil(th_per_ch)) \
+                    if is_inc_order[ch_id] else int(math.floor(th_per_ch))
 
         bits_per_word = 32
         rem = (bits_per_word - ch % bits_per_word) % bits_per_word

diff --git a/blueoil/converter/templates/Makefile b/blueoil/converter/templates/Makefile
@@ -116,19 +116,19 @@ clean:
 	-$(RM) $(OBJ)
 
 lm_x86:           CXX = g++
-lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++14 -DUSE_PNG -pthread -g
+lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++14 -DUSE_PNG -pthread -g -DFUNC_TIME_MEASUREMENT
 lm_x86:           CXXFLAGS +=
 
 lm_x86_avx:       CXX = g++
-lm_x86_avx:       FLAGS += $(INCLUDES) -O3 -std=c++14 -mavx2 -mfma -DUSE_AVX -DUSE_PNG -pthread -g -fopenmp
+lm_x86_avx:       FLAGS += $(INCLUDES) -O3 -std=c++14 -mavx2 -mfma -DUSE_AVX -DUSE_PNG -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT
 lm_x86_avx:       CXXFLAGS +=
 
 lm_aarch64:       CXX = aarch64-linux-gnu-g++
-lm_aarch64:       FLAGS += $(INCLUDES) -std=c++14 -O3 -DUSE_NEON -DUSE_PNG -pthread -g -fopenmp
+lm_aarch64:       FLAGS += $(INCLUDES) -std=c++14 -O3 -DUSE_NEON -DUSE_PNG -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT
 lm_aarch64:       CXXFLAGS +=
 
 lm_arm:           CXX = arm-linux-gnueabihf-g++
-lm_arm:           FLAGS += $(INCLUDES) -std=c++14 -O3 -DUSE_NEON -DUSE_PNG -DAARCH32 -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp
+lm_arm:           FLAGS += $(INCLUDES) -std=c++14 -O3 -DUSE_NEON -DUSE_PNG -DAARCH32 -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT
 lm_arm:           CXXFLAGS +=
 
 lm_fpga:          CXX = arm-linux-gnueabihf-g++

diff --git a/blueoil/converter/templates/include/dlk_test.h b/blueoil/converter/templates/include/dlk_test.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <sstream>
 #include <fstream>
 #include <vector>
-#include "global.h"
+#include "types.h"
 
 template <class T_SIZE, class T>
 struct Diff {

diff --git a/blueoil/converter/templates/include/func/add.h b/blueoil/converter/templates/include/func/add.h
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <functional>
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/binary_op.h"
 #include "time_measurement.h"

diff --git a/blueoil/converter/templates/include/func/average_pool.h b/blueoil/converter/templates/include/func/average_pool.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_AVERAGE_POOL_H_INCLUDED
 #define DLK_FUNC_AVERAGE_POOL_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 
 struct avg_pooling_parameters {

diff --git a/blueoil/converter/templates/include/func/batch_normalization.h b/blueoil/converter/templates/include/func/batch_normalization.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_BATCH_NORMALIZATION_H_INCLUDED
 #define DLK_FUNC_BATCH_NORMALIZATION_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 
 void func_BatchNormalizationOptimized(const TensorView<T_FLOAT, MemoryLayout::NHWC>& input,

diff --git a/blueoil/converter/templates/include/func/concat_on_depth.h b/blueoil/converter/templates/include/func/concat_on_depth.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <tuple>
 #include <type_traits>
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 #include "time_measurement.h"
 

diff --git a/blueoil/converter/templates/include/func/depth_to_space.h b/blueoil/converter/templates/include/func/depth_to_space.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_DEPTH_TO_SPACE_H_INCLUDED
 #define DLK_FUNC_DEPTH_TO_SPACE_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "time_measurement.h"
 #include "tensor_view.h"
 
@@ -48,8 +48,9 @@ inline void func_DepthToSpace(const TensorView<float, MemoryLayout::NHWC>& input
   Measurement::Stop();
 }
 
-inline void func_DepthToSpace(const TensorView<QUANTIZED_PACKED, MemoryLayout::HWChBCl>& input,
-    const TensorView<QUANTIZED_PACKED, MemoryLayout::HWChBCl>& output,
+template <typename T>
+void func_DepthToSpace(const TensorView<QuantizedPacked<T>, MemoryLayout::HWChBCl>& input,
+    const TensorView<QuantizedPacked<T>, MemoryLayout::HWChBCl>& output,
     T_UINT a, T_UINT b, T_UINT kernel_size, T_UINT stride) {
   Measurement::Start("DepthToSpace");
 
@@ -79,8 +80,9 @@ inline void func_DepthToSpace(const TensorView<QUANTIZED_PACKED, MemoryLayout::H
   Measurement::Stop();
 }
 
-inline void func_DepthToSpace(const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& input,
-    const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& output,
+template <typename T>
+void func_DepthToSpace(const TensorView<QuantizedPacked<T>, MemoryLayout::ChHWBCl>& input,
+    const TensorView<QuantizedPacked<T>, MemoryLayout::ChHWBCl>& output,
     T_UINT a, T_UINT b, T_UINT kernel_size, T_UINT stride) {
   Measurement::Start("DepthToSpace");
 

diff --git a/blueoil/converter/templates/include/func/leaky_relu.h b/blueoil/converter/templates/include/func/leaky_relu.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_LEAKY_RELU_H_INCLUDED
 #define DLK_FUNC_LEAKY_RELU_H_INCLUDED
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/unary_op.h"
 

diff --git a/blueoil/converter/templates/include/func/matmul.h b/blueoil/converter/templates/include/func/matmul.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_MATMUL_H_INCLUDED
 #define DLK_FUNC_MATMUL_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 
 void func_Matmul(const TensorView<T_FLOAT, MemoryLayout::NC>& input,

diff --git a/blueoil/converter/templates/include/func/max.h b/blueoil/converter/templates/include/func/max.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_MAX_H_INCLUDED
 #define DLK_FUNC_MAX_H_INCLUDED
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/binary_op.h"
 #include "time_measurement.h"

diff --git a/blueoil/converter/templates/include/func/max_pool.h b/blueoil/converter/templates/include/func/max_pool.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_MAX_POOLING_H_INCLUDED
 #define DLK_FUNC_MAX_POOLING_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 
 struct max_pooling_parameters {

diff --git a/blueoil/converter/templates/include/func/minimum.h b/blueoil/converter/templates/include/func/minimum.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_MINIMUM_H_INCLUDED
 #define DLK_FUNC_MINIMUM_H_INCLUDED
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/binary_op.h"
 #include "time_measurement.h"

diff --git a/blueoil/converter/templates/include/func/mul.h b/blueoil/converter/templates/include/func/mul.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_MUL_H_INCLUDED
 #define DLK_FUNC_MUL_H_INCLUDED
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/binary_op.h"
 #include "time_measurement.h"

diff --git a/blueoil/converter/templates/include/func/pad.h b/blueoil/converter/templates/include/func/pad.h
@@ -1,7 +1,7 @@
 #ifndef DLK_FUNC_PAD_H_INCLUDED
 #define DLK_FUNC_PAD_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "tensor_view.h"
 
 void func_Pad(const TensorView<T_FLOAT, MemoryLayout::NHWC>& input,

diff --git a/blueoil/converter/templates/include/func/real_div.h b/blueoil/converter/templates/include/func/real_div.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_REAL_DIV_H
 #define DLK_FUNC_REAL_DIV_H
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/binary_op.h"
 #include "time_measurement.h"

diff --git a/blueoil/converter/templates/include/func/relu.h b/blueoil/converter/templates/include/func/relu.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef DLK_FUNC_RELU_H_INCLUDED
 #define DLK_FUNC_RELU_H_INCLUDED
 
-#include "global.h"
 #include "tensor_view.h"
 #include "func/impl/unary_op.h"
 

diff --git a/blueoil/converter/templates/include/func/resize_nearest_neighbor.h b/blueoil/converter/templates/include/func/resize_nearest_neighbor.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef DLK_FUNC_RESIZE_NEAREST_NEIGHBOR_H_INCLUDED
 #define DLK_FUNC_RESIZE_NEAREST_NEIGHBOR_H_INCLUDED
 
-#include "global.h"
+#include "types.h"
 #include "time_measurement.h"
 #include "tensor_view.h"
 
@@ -51,8 +51,9 @@ inline void func_ResizeNearestNeighbor(const TensorView<float, MemoryLayout::NHW
 }
 
 
-inline void func_ResizeNearestNeighbor(const TensorView<QUANTIZED_PACKED, MemoryLayout::HWChBCl>& input,
-    const TensorView<QUANTIZED_PACKED, MemoryLayout::HWChBCl>& output) {
+template <typename T>
+void func_ResizeNearestNeighbor(const TensorView<QuantizedPacked<T>, MemoryLayout::HWChBCl>& input,
+    const TensorView<QuantizedPacked<T>, MemoryLayout::HWChBCl>& output) {
   Measurement::Start("ResizeNearestNeighbor");
 
   const auto in_shape = input.get_shape();
@@ -83,8 +84,9 @@ inline void func_ResizeNearestNeighbor(const TensorView<QUANTIZED_PACKED, Memory
 }
 
 
-inline void func_ResizeNearestNeighbor(const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& input,
-    const TensorView<QUANTIZED_PACKED, MemoryLayout::ChHWBCl>& output) {
+template <typename T>
+void func_ResizeNearestNeighbor(const TensorView<QuantizedPacked<T>, MemoryLayout::ChHWBCl>& input,
+    const TensorView<QuantizedPacked<T>, MemoryLayout::ChHWBCl>& output) {
   Measurement::Start("ResizeNearestNeighbor");
 
   const auto in_shape = input.get_shape();