arm_compute v17.12

ARM-software · Dec 14, 2017 · 48bc34e · 48bc34e
1 parent 8a3da6f
commit 48bc34e
Show file tree

Hide file tree

Showing 10,812 changed files with 535,620 additions and 332,812 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 ARM Software
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ Related projects:
 
 Documentation available here:
 
+- [v17.12](https://arm-software.github.io/ComputeLibrary/v17.12/)
 - [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/)
 - [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
 - [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/)
@@ -18,6 +19,7 @@ Documentation available here:
 
 Binaries available here:
 
+- [v17.12](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.12/arm_compute-v17.12-bin.tar.gz)
 - [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)
 - [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)
 - [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz)

diff --git a/SConscript b/SConscript
@@ -24,8 +24,8 @@ import os.path
 import re
 import subprocess
 
-VERSION = "v17.10"
-SONAME_VERSION="5.0.0"
+VERSION = "v17.12"
+SONAME_VERSION="6.0.0"
 
 Import('env')
 Import('vars')
@@ -125,6 +125,8 @@ def create_version_file(target, source, env):
 
 
 arm_compute_env = env.Clone()
+# Don't allow undefined references in the libraries:
+arm_compute_env.Append(LINKFLAGS=['-Wl,--no-undefined','-Wl,--no-allow-shlib-undefined'])
 
 generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ]
 arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] )
@@ -137,8 +139,12 @@ arm_compute_env.Append(LIBS = ['dl'])
 core_files = Glob('src/core/*.cpp')
 core_files += Glob('src/core/CPP/*.cpp')
 core_files += Glob('src/core/CPP/kernels/*.cpp')
+core_files += Glob('src/core/utils/*/*.cpp')
 
 runtime_files = Glob('src/runtime/*.cpp')
+runtime_files += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp')
+runtime_files += Glob('src/runtime/CPP/functions/*.cpp')
+
 # CLHarrisCorners uses the Scheduler to run CPP kernels
 runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
 
@@ -178,30 +184,46 @@ if env['neon']:
     runtime_files += Glob('src/runtime/NEON/*.cpp')
     runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
 
-static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files]
-shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files]
+if env['gles_compute']:
+    if env['os'] != 'android':
+        arm_compute_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
+
+    core_files += Glob('src/core/GLES_COMPUTE/*.cpp')
+    core_files += Glob('src/core/GLES_COMPUTE/kernels/*.cpp')
+
+    runtime_files += Glob('src/runtime/GLES_COMPUTE/*.cpp')
+    runtime_files += Glob('src/runtime/GLES_COMPUTE/functions/*.cpp')
 
-arm_compute_core_a = build_library('arm_compute_core-static', static_core_objects, static=True)
+    # Generate embed files
+    if env['embed_kernels']:
+        cs_files = Glob('src/core/GLES_COMPUTE/cs_shaders/*.cs')
+        cs_files += Glob('src/core/GLES_COMPUTE/cs_shaders/*.h')
+
+        embed_files = [ f.get_path()+"embed" for f in cs_files ]
+        arm_compute_env.Append(CPPPATH =[Dir("./src/core/GLES_COMPUTE/").path] )
+
+        generate_embed.append(arm_compute_env.Command(embed_files, cs_files, action=resolve_includes))
+
+arm_compute_core_a = build_library('arm_compute_core-static', core_files, static=True)
 Export('arm_compute_core_a')
 
 if env['os'] != 'bare_metal' and not env['standalone']:
-    arm_compute_core_so = build_library('arm_compute_core', shared_core_objects, static=False)
+    arm_compute_core_so = build_library('arm_compute_core', core_files, static=False)
     Export('arm_compute_core_so')
 
-shared_runtime_objects = [arm_compute_env.SharedObject(f) for f in runtime_files]
-static_runtime_objects = [arm_compute_env.StaticObject(f) for f in runtime_files]
-
-arm_compute_a = build_library('arm_compute-static', static_runtime_objects, static=True, libs = [ arm_compute_core_a ])
+arm_compute_a = build_library('arm_compute-static', runtime_files, static=True, libs = [ arm_compute_core_a ])
 Export('arm_compute_a')
 
 if env['os'] != 'bare_metal' and not env['standalone']:
-    arm_compute_so = build_library('arm_compute', shared_runtime_objects, static=False, libs = [ "arm_compute_core" ])
+    arm_compute_so = build_library('arm_compute', runtime_files, static=False, libs = [ "arm_compute_core" ])
     Depends(arm_compute_so, arm_compute_core_so)
     Export('arm_compute_so')
 
 if env['neon'] and env['opencl']:
+    Import('opencl')
     graph_files = Glob('src/graph/*.cpp')
     graph_files += Glob('src/graph/nodes/*.cpp')
+    graph_files += Glob('src/graph/operations/*.cpp')
 
     graph_files += Glob('src/graph/CL/*.cpp')
     graph_files += Glob('src/graph/NEON/*.cpp')
@@ -212,8 +234,10 @@ if env['neon'] and env['opencl']:
     arm_compute_graph_a = build_library('arm_compute_graph-static', static_graph_objects, static=True, libs = [ arm_compute_a ])
     Export('arm_compute_graph_a')
 
-    arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core" ])
-    Depends( arm_compute_graph_so, arm_compute_so)
+    arm_compute_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
+    arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core", "OpenCL" ])
+    Depends(arm_compute_graph_so, arm_compute_so)
+    Depends(arm_compute_graph_so, opencl)
     Export('arm_compute_graph_so')
 
     graph_alias = arm_compute_env.Alias("arm_compute_graph", [arm_compute_graph_a, arm_compute_graph_so])

diff --git a/SConstruct b/SConstruct
@@ -39,6 +39,7 @@ vars = Variables("scons")
 vars.AddVariables(
     BoolVariable("debug", "Debug", False),
     BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False),
+    BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False),
     EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "x86_32", "x86_64")),
     EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")),
     EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile")),
@@ -47,7 +48,8 @@ vars.AddVariables(
     BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False),
     BoolVariable("opencl", "Enable OpenCL support", True),
     BoolVariable("neon", "Enable Neon support", False),
-    BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False),
+    BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
+    BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False),
     BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
     BoolVariable("openmp", "Enable OpenMP backend", False),
     BoolVariable("cppthreads", "Enable C++11 threads backend", True),
@@ -81,6 +83,7 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
          '-Winit-self','-Wstrict-overflow=2','-Wswitch-default',
          '-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual',
          '-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow'])
+
 env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
 
 if os.environ.get('CXX', 'g++') == 'clang++':
@@ -115,16 +118,19 @@ if env['arch'] == 'armv7a':
         env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
 elif env['arch'] == 'arm64-v8a':
     env.Append(CXXFLAGS = ['-march=armv8-a'])
-
+    env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8A'])
     if env['os'] == 'linux':
         prefix = "aarch64-linux-gnu-"
     elif env['os'] == 'bare_metal':
         prefix = "aarch64-elf-"
     elif env['os'] == 'android':
         prefix = "aarch64-linux-android-"
 elif env['arch'] == 'arm64-v8.2-a':
-    env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd'])
-    env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
+    env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
+
+    if os.environ.get('CXX', 'g++') == 'clang++':
+        env.Append(CXXFLAGS = ['-fno-integrated-as'])
+
     if env['os'] == 'linux':
         prefix = "aarch64-linux-gnu-"
     elif env['os'] == 'bare_metal':
@@ -172,6 +178,8 @@ if not GetOption("help"):
 if env['standalone']:
     env.Append(CXXFLAGS = ['-fPIC'])
     env.Append(LINKFLAGS = ['-static-libgcc','-static-libstdc++'])
+    if env['cppthreads']:
+        env.Append(LINKFLAGS = ['-lpthread'])
 
 if env['Werror']:
     env.Append(CXXFLAGS = ['-Werror'])
@@ -187,15 +195,17 @@ elif env['os'] == 'bare_metal':
     env.Append(CPPDEFINES = ['BARE_METAL'])
 
 if env['opencl']:
-    if env['os'] == 'bare_metal':
+    if env['os'] in ['bare_metal'] or env['standalone']:
         print("Cannot link OpenCL statically, which is required on bare metal")
         Exit(1)
 
+if env['opencl'] or env['gles_compute']:
     if env['embed_kernels']:
         env.Append(CPPDEFINES = ['EMBEDDED_KERNELS'])
 
 if env['debug']:
     env['asserts'] = True
+    env['logging'] = True
     env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2'])
     env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED'])
 else:
@@ -205,18 +215,26 @@ if env['asserts']:
     env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])
     env.Append(CXXFLAGS = ['-fstack-protector-strong'])
 
+if env['logging']:
+    env.Append(CPPDEFINES = ['ARM_COMPUTE_LOGGING_ENABLED'])
+
 env.Append(CPPPATH = ['#/include', "#"])
 env.Append(CXXFLAGS = env['extra_cxx_flags'])
 
 Export('vars')
 Export('env')
 Export('version_at_least')
 
-SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0)
-
 if env['opencl']:
     SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0)
 
+if env['gles_compute'] and env['os'] != 'android':
+    env.Append(CPPPATH = ['#/include/linux'])
+    env.Append(LIBPATH = ["#build/%s/opengles-3.1-stubs" % env['build_dir']])
+    SConscript("./opengles-3.1-stubs/SConscript", variant_dir="build/%s/opengles-3.1-stubs" % env['build_dir'], duplicate=0)
+
+SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0)
+
 if env['examples'] and env['os'] != 'bare_metal':
     SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0)
 

diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
@@ -43,7 +43,7 @@ struct enable_bitwise_ops<arm_compute::GPUTarget>
 };
 
 /** Max vector width of an OpenCL vector */
-static constexpr const unsigned int max_cl_vector_width = 16;
+static constexpr unsigned int max_cl_vector_width = 16;
 
 /** Translates a tensor data type to the appropriate OpenCL type.
  *
@@ -126,6 +126,13 @@ GPUTarget get_arch_from_target(GPUTarget target);
  * @return the highest OpenCL version supported
  */
 CLVersion get_cl_version(const cl::Device &device);
+/** Helper function to check whether the cl_khr_fp16 extension is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the extension is supported
+ */
+bool fp16_support(const cl::Device &device);
 /** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
  *
  * @param[in] device A CL device

diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
@@ -33,6 +33,52 @@
 
 namespace arm_compute
 {
+/** Build options */
+class CLBuildOptions
+{
+    using StringSet = std::set<std::string>;
+
+public:
+    /** Default constructor. */
+    CLBuildOptions();
+    /** Adds option to the existing build option list
+     *
+     * @param[in] option Option to add
+     */
+    void add_option(std::string option);
+    /** Adds option if a given condition is true;
+     *
+     * @param[in] cond   Condition to check
+     * @param[in] option Option to add if condition is true
+     */
+    void add_option_if(bool cond, std::string option);
+    /** Adds first option if condition is true else the second one
+     *
+     * @param[in] cond         Condition to check
+     * @param[in] option_true  Option to add if condition is true
+     * @param[in] option_false Option to add if condition is false
+     */
+    void add_option_if_else(bool cond, std::string option_true, std::string option_false);
+    /** Appends given build options to the current's objects options.
+     *
+     * @param[in] options Build options to append
+     */
+    void add_options(const StringSet &options);
+    /** Appends given build options to the current's objects options if a given condition is true.
+     *
+     * @param[in] cond    Condition to check
+     * @param[in] options Option to add if condition is true
+     */
+    void add_options_if(bool cond, const StringSet &options);
+    /** Gets the current options list set
+     *
+     * @return Build options set
+     */
+    const StringSet &options() const;
+
+private:
+    StringSet _build_opts; /**< Build options set */
+};
 /** Program class */
 class Program
 {
@@ -181,8 +227,8 @@ class CLKernelLibrary
         return _kernel_path;
     };
     /** Gets the source of the selected program
-      *
-      * @param[in] program_name Program name.
+     *
+     * @param[in] program_name Program name.
      */
     std::string get_program_source(const std::string &program_name);
     /** Sets the CL context used to create programs.

diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
@@ -42,9 +42,9 @@
 #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
 #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
 #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h"
@@ -58,6 +58,10 @@
 #include "arm_compute/core/CL/kernels/CLFloorKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
@@ -72,7 +76,7 @@
 #include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
 #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
 #include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
+#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
 #include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
 #include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"

diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
@@ -180,6 +180,13 @@ class ICLKernel : public IKernel
      * @return The maximum workgroup size value.
      */
     size_t get_max_workgroup_size();
+    /** Get the global work size given an execution window
+     *
+     * @param[in] window Execution window
+     *
+     * @return Global work size of the given execution window
+     */
+    static cl::NDRange gws_from_window(const Window &window);
 
 private:
     /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.

diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h
@@ -35,14 +35,14 @@ class ICLMultiHOG : public IMultiHOG
 public:
     /** Return a pointer to the requested OpenCL HOG model
      *
-     *  @param[in] index The index of the wanted OpenCL HOG model.
+     * @param[in] index The index of the wanted OpenCL HOG model.
      *
      *  @return A pointer pointed to the HOG model
      */
     virtual ICLHOG *cl_model(size_t index) = 0;
     /** Return a constant pointer to the requested OpenCL HOG model
      *
-     *  @param[in] index The index of the wanted OpenCL HOG model.
+     * @param[in] index The index of the wanted OpenCL HOG model.
      *
      *  @return A constant pointer pointed to the OpenCL HOG model
      */