Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Add BLAS3 and LAPACK routines (#6538)
Browse files Browse the repository at this point in the history
* Added linear algebra operators

* more comments about style of wrapper interface

* more appropriate fatal exit when lapack does not exist

* more comments on row/col-major ordering

* added config switch for lapack usage

* switched lapack usage off by default
  • Loading branch information
asmushetzel authored and piiswrong committed Jun 13, 2017
1 parent c43c901 commit e852036
Show file tree
Hide file tree
Showing 19 changed files with 1,602 additions and 13 deletions.
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,16 @@ if(USE_OPENMP)
endif()
endif()

if(USE_LAPACK)
add_definitions(-DMXNET_USE_LAPACK=1)
else(USE_LAPACK)
# Workaround for Windows until using new Jenkinsfile.
if(USE_BLAS STREQUAL "open")
add_definitions(-DMXNET_USE_LAPACK=1)
endif()
endif()


if(UNIX)
find_library(RTLIB rt)
if(RTLIB)
Expand Down
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ USE_CPP_PACKAGE=1 \
init_git_win()
bat """mkdir build_vc14_cpu
cd build_vc14_cpu
cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 ${env.WORKSPACE}"""
cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 ${env.WORKSPACE}"""
bat 'C:\\mxnet\\build_vc14_cpu.bat'

bat '''rmdir /s/q pkg_vc14_gpu
Expand Down Expand Up @@ -188,7 +188,7 @@ del /Q *.7z
bat """mkdir build_vc14_gpu
call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat"
cd build_vc14_gpu
cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release ${env.WORKSPACE}"""
cmake -G \"NMake Makefiles JOM\" -DUSE_CUDA=1 -DUSE_CUDNN=1 -DUSE_NVRTC=1 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DCMAKE_CXX_FLAGS_RELEASE="/FS /MD /O2 /Ob2 /DNDEBUG" -DCMAKE_BUILD_TYPE=Release ${env.WORKSPACE}"""
bat 'C:\\mxnet\\build_vc14_gpu.bat'
bat '''rmdir /s/q pkg_vc14_gpu
mkdir pkg_vc14_gpu\\lib
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ else
endif
endif

# lapack settings.
ifeq ($(USE_LAPACK), 1)
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),openblas apple atlas mkl))
CFLAGS += -DMXNET_USE_LAPACK
endif
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),openblas atlas mkl))
LDFLAGS += -llapack
endif
endif

ifeq ($(USE_CUDNN), 1)
CFLAGS += -DMSHADOW_USE_CUDNN=1
LDFLAGS += -lcudnn
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ before_build:
set OpenCV_DIR=%APPVEYOR_BUILD_FOLDER%/%MXNET_OPENCV_DIR%/build
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
build_script:
- cmd: >-
Expand Down
2 changes: 1 addition & 1 deletion docs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ MAINTAINER Mu Li <[email protected]>
# First, build MXNet binaries (ref mxnet/docker/cpu/Dockerfile)
#

RUN apt-get update && apt-get install -y build-essential git libopenblas-dev libopencv-dev
RUN apt-get update && apt-get install -y build-essential git libopenblas-dev liblapack-dev libopencv-dev
RUN git clone --recursive https://github.com/dmlc/mxnet/ && cd mxnet && \
cp make/config.mk . && \
echo "USE_BLAS=openblas" >>config.mk && \
Expand Down
15 changes: 15 additions & 0 deletions docs/api/python/symbol.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,21 @@ Composite multiple symbols into a new one by an operator.
argmin
```

### Linear Algebra

```eval_rst
.. autosummary::
:nosignatures:
linalg_gemm
linalg_gemm2
linalg_potrf
linalg_potri
linalg_trmm
linalg_trsm
linalg_sumlogdiag
```

### Miscellaneous

```eval_rst
Expand Down
16 changes: 8 additions & 8 deletions docs/get_started/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ $ sudo apt-get install -y build-essential git

**Step 2** Install OpenBLAS.

*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations on CPU machine. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
```bash
$ sudo apt-get install -y libopenblas-dev
$ sudo apt-get install -y libopenblas-dev liblapack-dev
```

**Step 3** Install OpenCV.
Expand Down Expand Up @@ -429,9 +429,9 @@ $ sudo apt-get install -y build-essential git
```
**Step 2** Install OpenBLAS.

*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
```bash
$ sudo apt-get install -y libopenblas-dev
$ sudo apt-get install -y libopenblas-dev liblapack-dev
```

**Step 3** Install OpenCV.
Expand Down Expand Up @@ -751,9 +751,9 @@ $ sudo apt-get install -y build-essential git

**Step 2** Install OpenBLAS.

*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations on CPU machine. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
```bash
$ sudo apt-get install -y libopenblas-dev
$ sudo apt-get install -y libopenblas-dev liblapack-dev
```

**Step 3** Install OpenCV.
Expand Down Expand Up @@ -823,9 +823,9 @@ $ sudo apt-get install -y build-essential git
```
**Step 2** Install OpenBLAS.

*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) library for accelerated numerical computations. There are several flavors of BLAS libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
*MXNet* uses [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries for accelerated numerical computations on CPU machine. There are several flavors of BLAS/LAPACK libraries - [OpenBLAS](http://www.openblas.net/), [ATLAS](http://math-atlas.sourceforge.net/) and [MKL](https://software.intel.com/en-us/intel-mkl). In this step we install OpenBLAS. You can choose to install ATLAS or MKL.
```bash
$ sudo apt-get install -y libopenblas-dev
$ sudo apt-get install -y libopenblas-dev liblapack-dev
```

**Step 3** Install OpenCV.
Expand Down
91 changes: 91 additions & 0 deletions include/mxnet/c_lapack_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*!
* Copyright (c) 2017 by Contributors
* \file c_lapack_api.h
* \brief Unified interface for LAPACK calls from within mxnet.
* Purpose is to hide the platform specific differences.
*/
#ifndef MXNET_C_LAPACK_API_H_
#define MXNET_C_LAPACK_API_H_

// Manually maintained list of LAPACK interfaces that can be used
// within MXNET. Conventions:
// - Interfaces must be compliant with lapacke.h in terms of signature and
// naming conventions so wrapping a function "foo" which has the
// signature
// lapack_int LAPACKE_foo(int, char, lapack_int, float* , lapack_int)
// within lapacke.h should result in a wrapper with the following signature
// int MXNET_LAPACK_foo(int, char, int, float* , int)
// Note that function signatures in lapacke.h will always have as first
// argument the storage order (row/col-major). All wrappers have to support
// that argument. The underlying fortran functions will always assume a
// column-major layout. It is the responsibility of the wrapper function
// to handle the (usual) case that it is called with data in row-major
// format, either by doing appropriate transpositions explicitly or using
// transposition options of the underlying fortran function.
// - It is ok to assume that matrices are stored in contiguous memory
// (which removes the need to do special handling for lda/ldb parameters
// and enables us to save additional matrix transpositions around
// the fortran calls).
// - It is desired to add some basic checking in the C++-wrappers in order
// to catch simple mistakes when calling these wrappers.
// - Must support compilation without lapack-package but issue runtime error in this case.

#include <dmlc/logging.h>

extern "C" {
// Fortran signatures
#define MXNET_LAPACK_FSIGNATURE1(func, dtype) \
void func##_(char* uplo, int* n, dtype* a, int* lda, int *info);

MXNET_LAPACK_FSIGNATURE1(spotrf, float)
MXNET_LAPACK_FSIGNATURE1(dpotrf, double)
MXNET_LAPACK_FSIGNATURE1(spotri, float)
MXNET_LAPACK_FSIGNATURE1(dpotri, double)
}

#define MXNET_LAPACK_ROW_MAJOR 101
#define MXNET_LAPACK_COL_MAJOR 102

#define CHECK_LAPACK_CONTIGUOUS(a, b) \
CHECK_EQ(a, b) << "non contiguous memory for array in lapack call";

#define CHECK_LAPACK_UPLO(a) \
CHECK(a == 'U' || a == 'L') << "neither L nor U specified as triangle in lapack call";

inline char loup(char uplo, bool invert) { return invert ? (uplo == 'U' ? 'L' : 'U') : uplo; }

#if MXNET_USE_LAPACK

#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda ) { \
CHECK_LAPACK_CONTIGUOUS(n, lda); \
CHECK_LAPACK_UPLO(uplo); \
char o(loup(uplo, (matrix_layout == MXNET_LAPACK_ROW_MAJOR))); \
int ret(0); \
func##_(&o, &n, a, &lda, &ret); \
return ret; \
}
MXNET_LAPACK_CWRAPPER1(spotrf, float)
MXNET_LAPACK_CWRAPPER1(dpotrf, double)
MXNET_LAPACK_CWRAPPER1(spotri, float)
MXNET_LAPACK_CWRAPPER1(dpotri, double)

#else
// use pragma message instead of warning
#pragma message("Warning: lapack usage not enabled, linalg-operators will be not available." \
" Build with USE_LAPACK=1 to get lapack functionalities.")

// Define compilable stubs.
#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda ) { \
LOG(FATAL) << "MXNet build without lapack. Function " << #func << " is not available."; \
return 1; \
}
MXNET_LAPACK_CWRAPPER1(spotrf, float)
MXNET_LAPACK_CWRAPPER1(dpotrf, double)
MXNET_LAPACK_CWRAPPER1(spotri, float)
MXNET_LAPACK_CWRAPPER1(dpotri, double)

#endif

#endif // MXNET_C_LAPACK_API_H_
28 changes: 28 additions & 0 deletions include/mxnet/tensor_blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,34 @@ class TBlob {
return this->get_with_shape<Device, 3, DType>(
this->shape_.FlatTo3D(axis_begin, axis_end), stream);
}
/*!
* \brief flatten the tensor to specified number of dimensions,
* collapse the highest dimensions or pad with higher dimensions
* \param stream the possible stream target tensor should reside on
* \tparam Device which device the tensor is on
* \tparam dim desired number of dimensions of returned tensor
* \tparam DType the type of elements in the tensor
* \return tensor after flatten
*/
template<typename Device, int dim, typename DType>
inline mshadow::Tensor<Device, dim, DType> FlatToKD(
mshadow::Stream<Device> *stream = NULL) const {
mshadow::Shape<dim> shape;
shape[0] = 1;
// Pad higher dimensions in case dim > ndim()
for (int i = 0; i < dim - ndim(); ++i) {
shape[i] = 1;
}
// Collapse higher dimensions in case dim < ndim()
for (int i = 0; i < ndim() - dim + 1; ++i) {
shape[0] *= shape_[i];
}
// Preserve lower dimensions.
for (int i = std::max(0, ndim() - dim + 1); i < ndim(); ++i) {
shape[i - ndim() + dim] = shape_[i];
}
return this->get_with_shape<Device, dim, DType>(shape, stream);
}

private:
static DLDataType DTypeTransform(int type_flag) {
Expand Down
3 changes: 3 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ USE_OPENCV = 1
# use openmp for parallelization
USE_OPENMP = 1

# whether use lapack during compilation
# only effective when compiled with blas versions openblas/apple/atlas/mkl
USE_LAPACK = 0

# MKL ML Library for Intel CPU/Xeon Phi
# Please refer to MKL_README.md for details
Expand Down
4 changes: 4 additions & 0 deletions make/osx.mk
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ USE_OPENCV = 1
# use openmp for parallelization
USE_OPENMP = 0

# whether use lapack during compilation
# only effective when compiled with blas versions openblas/apple/atlas/mkl
USE_LAPACK = 0

# choose the version of blas you want to use
# can be: mkl, blas, atlas, openblas
USE_BLAS = apple
Expand Down
5 changes: 5 additions & 0 deletions make/pip_linux_cpu.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ ADD_CFLAGS += -Ldeps/lib -Ideps/include
# matrix computation libraries for CPU/GPU
#---------------------------------------------

# whether use lapack during compilation
# only effective when compiled with blas versions openblas/apple/atlas/mkl
# you can disable it, however, you will not be able to use linalg-operators
USE_LAPACK = 0

# choose the version of blas you want to use
# can be: mkl, blas, atlas, openblas
# in default use atlas for linux while apple for osx
Expand Down
2 changes: 2 additions & 0 deletions make/readthedocs.mk
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ USE_OPENMP = 0
# can be: mkl, blas, atlas, openblas
USE_STATIC_MKL = NONE
USE_BLAS = NONE
USE_LAPACK = 0

#
# add path to intel library, you may need it
# for MKL, if you did not add the path to environment variable
Expand Down
17 changes: 17 additions & 0 deletions src/operator/elemwise_op_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,23 @@ struct ElemwiseGradUseOut {
}
};

// Transfer gradient and input and output to FGradient function
struct ElemwiseGradUseInOut {
const char *op_name;
std::vector<nnvm::NodeEntry> operator()(const nnvm::NodePtr& n,
const std::vector<nnvm::NodeEntry>& ograds) {
std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
for (auto& h : n->inputs) {
heads.push_back(h);
}
index_t n_out = n->num_outputs();
for (index_t i = 0; i < n_out; ++i) {
heads.emplace_back(nnvm::NodeEntry{n, i, 0});
}
return MakeGradNode(op_name, n, heads, n->attrs.dict);
}
};

// Transfer only gradient to FGradient function
struct ElemwiseGradUseNone {
const char *op_name;
Expand Down
Loading

0 comments on commit e852036

Please sign in to comment.