[RUNTIME] Finish GPU runtime and python interface

apache · Jan 17, 2017 · 6ec929b · 6ec929b
1 parent f2f1526
commit 6ec929b
Show file tree

Hide file tree

Showing 16 changed files with 931 additions and 22 deletions.
diff --git a/Makefile b/Makefile
@@ -1,6 +1,12 @@
-export LDFLAGS = -pthread -lm
-export CFLAGS =  -std=c++11 -Wall -O2\
-	 -Iinclude -Idmlc-core/include -IHalideIR/src  -fPIC
+ifndef config
+ifneq ("$(wildcard ./config.mk)","")
+	config = config.mk
+else
+	config = make/config.mk
+endif
+endif
+
+include $(config)
 
 # specify tensor path
 .PHONY: clean all test doc
@@ -13,6 +19,30 @@ SRC = $(wildcard src/*.cc src/*/*.cc)
 ALL_OBJ = $(patsubst src/%.cc, build/%.o, $(SRC))
 ALL_DEP = $(ALL_OBJ) $(LIB_HALIDE_IR)
 
+ifneq ($(USE_CUDA_PATH), NONE)
+	NVCC=$(USE_CUDA_PATH)/bin/nvcc
+endif
+
+export LDFLAGS = -pthread -lm
+export CFLAGS =  -std=c++11 -Wall -O2\
+	 -Iinclude -Idmlc-core/include -IHalideIR/src  -fPIC
+
+ifneq ($(ADD_CFLAGS), NONE)
+	CFLAGS += $(ADD_CFLAGS)
+endif
+
+ifneq ($(ADD_LDFLAGS), NONE)
+	LDFLAGS += $(ADD_LDFLAGS)
+endif
+
+
+ifeq ($(USE_CUDA), 1)
+	CFLAGS += -DTVM_RUNTIME_CUDA=1
+	LDFLAGS += -lcuda -lcudart
+else
+	CFLAGS += -DTVM_RUNTIME_CUDA=0
+endif
+
 include tests/cpp/unittest.mk
 
 test: $(TEST)

diff --git a/include/tvm/base.h b/include/tvm/base.h
@@ -17,6 +17,20 @@
 
 namespace tvm {
 
+/*!
+ *\brief whether to use CUDA runtime
+ */
+#ifndef TVM_CUDA_RUNTIME
+#define TVM_CUDA_RUNTIME 1
+#endif
+
+/*!
+ *\brief whether to use opencl runtime
+ */
+#ifndef TVM_OPENCL_RUNTIME
+#define TVM_OPENCL_RUNTIME 0
+#endif
+
 using ::tvm::Node;
 using ::tvm::NodeRef;
 using ::tvm::AttrVisitor;

diff --git a/include/tvm/c_runtime_api.h b/include/tvm/c_runtime_api.h
@@ -34,7 +34,7 @@
 
 TVM_EXTERN_C {
 /*! \brief type of array index. */
-typedef unsigned tvm_index_t;
+typedef uint32_t tvm_index_t;
 
 /*!
  * \brief union type for arguments and return values
@@ -68,7 +68,7 @@ typedef enum {
   /*! \brief NVidia GPU device(CUDA) */
   kGPU = 2,
   /*! \brief opencl device */
-  KOpenCL = 4
+  kOpenCL = 4
 } TVMDeviceMask;
 
 /*!
@@ -79,7 +79,7 @@ typedef struct {
   int dev_mask;
   /*! \brief the device id */
   int dev_id;
-} TVMDevice;
+} TVMContext;
 
 /*! \brief The type code in TVMDataType */
 typedef enum {
@@ -122,8 +122,8 @@ typedef struct {
   tvm_index_t ndim;
   /*! \brief The data type flag */
   TVMDataType dtype;
-  /*! \brief The device this array sits on */
-  TVMDevice device;
+  /*! \brief The device context this array sits on */
+  TVMContext ctx;
 } TVMArray;
 
 /*!
@@ -150,21 +150,31 @@ typedef TVMArray* TVMArrayHandle;
  */
 TVM_DLL const char *TVMGetLastError(void);
 
+/*!
+ * \brief Whether the specified context is enabled.
+ *
+ * \param ctx The context to be checked.
+ * \param out_enabled whether the ctx is enabled.
+ * \return Whether the function is successful.
+ */
+TVM_DLL int TVMContextEnabled(TVMContext ctx,
+                              int* out_enabled);
+
 /*!
  * \brief Allocate a nd-array's memory,
  *  including space of shape, of given spec.
  *
  * \param shape The shape of the array, the data content will be copied to out
  * \param ndim The number of dimension of the array.
  * \param dtype The array data type.
- * \param device The device this array sits on.
+ * \param ctx The ctx this array sits on.
  * \param out The output handle.
  * \return Whether the function is successful.
  */
 TVM_DLL int TVMArrayAlloc(const tvm_index_t* shape,
                           tvm_index_t ndim,
-                          int dtype,
-                          TVMDevice device,
+                          TVMDataType dtype,
+                          TVMContext ctx,
                           TVMArrayHandle* out);
 /*!
  * \brief Free the TVM Array.
@@ -183,9 +193,10 @@ TVM_DLL int TVMArrayCopyFromTo(TVMArrayHandle from,
                                TVMStreamHandle stream);
 /*!
  * \brief Wait until all computations on stream completes.
- * \param stream the stream to be synchronized.
+ * \param ctx The ctx to be synchronized.
+ * \param stream The stream to be synchronized.
  */
-TVM_DLL int TVMSynchronize(TVMStreamHandle stream);
+TVM_DLL int TVMSynchronize(TVMContext ctx, TVMStreamHandle stream);
 
 /*!
  * \brief Launch a generated TVM function

diff --git a/make/config.mk b/make/config.mk
@@ -0,0 +1,46 @@
+#-------------------------------------------------------------------------------
+#  Template configuration for compiling
+#
+#  If you want to change the configuration, please use the following
+#  steps. Assume you are on the root directory. First copy the this
+#  file so that any local changes will be ignored by git
+#
+#  $ cp make/config.mk .
+#
+#  Next modify the according entries, and then compile by
+#
+#  $ make
+#
+#  or build in parallel with 8 threads
+#
+#  $ make -j8
+#-------------------------------------------------------------------------------
+
+#---------------------
+# choice of compiler
+#--------------------
+export NVCC = nvcc
+
+# whether compile with debug
+DEBUG = 0
+
+# the additional link flags you want to add
+ADD_LDFLAGS =
+
+# the additional compile flags you want to add
+ADD_CFLAGS =
+
+#---------------------------------------------
+# matrix computation libraries for CPU/GPU
+#---------------------------------------------
+
+# whether use CUDA during compile
+USE_CUDA = 1
+
+# add the path to CUDA library to link and compile flag
+# if you have already add them to environment variable, leave it as NONE
+# USE_CUDA_PATH = /usr/local/cuda
+USE_CUDA_PATH = NONE
+
+# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
+USE_NVRTC = 0
diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
@@ -3,13 +3,16 @@
 from __future__ import absolute_import as _abs
 from ._ctypes._api import register_node
 
-from . import tensor as tensor
+from . import tensor
 from . import expr
 from . import stmt
 from . import make
 from . import ir_pass
 from . import collections
 from . import schedule
 
+from . import ndarray as nd
+from .ndarray import cpu, gpu, opencl
+
 from ._base import TVMError
 from .function import *
diff --git a/python/tvm/_base.py b/python/tvm/_base.py
@@ -58,6 +58,7 @@ def check_call(ret):
     if ret != 0:
         raise TVMError(py_str(_LIB.TVMGetLastError()))
 
+
 def c_str(string):
     """Create ctypes char * from a python string
     Parameters
@@ -72,6 +73,26 @@ def c_str(string):
     """
     return ctypes.c_char_p(string.encode('utf-8'))
 
+
+def c_array(ctype, values):
+    """Create ctypes array from a python array
+
+    Parameters
+    ----------
+    ctype : ctypes data type
+        data type of the array we want to convert to
+
+    values : tuple or list
+        data content
+
+    Returns
+    -------
+    out : ctypes array
+        Created ctypes array
+    """
+    return (ctype * len(values))(*values)
+
+
 def ctypes2docstring(num_args, arg_names, arg_types, arg_descs, remove_dup=True):
     """Convert ctypes returned doc string information into parameters docstring.