IBM · diego-plan9 · Oct 30, 2020 · Sep 18, 2020 · Oct 20, 2020 · Oct 27, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -40,8 +40,16 @@ include(cmake/dependencies_cuda.cmake)
 include(cmake/dependencies_test.cmake)
 
 # Set compilation flags.
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-narrowing -Wno-strict-overflow")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -ftree-vectorize")
+if(WIN32)
+  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2")
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+else()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-narrowing -Wno-strict-overflow")
+  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -ftree-vectorize")
+endif()
+
+
 add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
 if (APPLE)
   string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
@@ -52,20 +60,29 @@ add_subdirectory(src/rpucuda)
 include_directories(SYSTEM src/rpucuda)
 
 add_library(RPU_CPU ${RPU_CPU_SRCS})
+
 target_link_libraries(RPU_CPU ${RPU_DEPENDENCY_LIBS})
+if(WIN32)
+  target_link_libraries(RPU_CPU c10.lib torch_cpu.lib)
+endif()
+
 set_target_properties(RPU_CPU PROPERTIES CXX_STANDARD 11
                                          POSITION_INDEPENDENT_CODE ON)
 
 if (USE_CUDA)
   add_subdirectory(src/rpucuda/cuda)
   include_directories(SYSTEM src/rpucuda/cuda)
-
   add_library(RPU_GPU ${RPU_GPU_SRCS})
+
   target_link_libraries(RPU_GPU RPU_CPU cublas curand ${RPU_DEPENDENCY_LIBS})
+  if(WIN32)
+    target_link_libraries(RPU_GPU c10.lib torch_cuda.lib)
+  endif(WIN32)
+
   set_target_properties(RPU_GPU PROPERTIES ${CUDA_TARGET_PROPERTIES})
   set_property(TARGET RPU_GPU PROPERTY CUDA_ARCHITECTURES ${RPU_CUDA_ARCHITECTURES})
   add_dependencies(RPU_GPU cub)
-endif()
+endif(USE_CUDA)
 
 # Add aihwkit targets.
 add_subdirectory(src/aihwkit/simulator)

diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
@@ -24,6 +24,7 @@ SET(Open_BLAS_INCLUDE_SEARCH_PATHS
   /usr/local/include/openblas-base
   /usr/local/opt/openblas/include
   /opt/OpenBLAS/include
+  /opt/include/OpenBLAS
   $ENV{OpenBLAS_HOME}
   $ENV{OpenBLAS_HOME}/include
   $ENV{OPENBLAS_ROOT_DIR}
@@ -42,6 +43,8 @@ SET(Open_BLAS_LIB_SEARCH_PATHS
   /usr/local/lib64
   /usr/local/opt/openblas/lib
   /opt/OpenBLAS/lib
+  /opt/lib
+  /opt/lib/OpenBLAS
   $ENV{OpenBLAS}
   $ENV{OpenBLAS}/lib
   $ENV{OpenBLAS_HOME}
@@ -85,4 +88,3 @@ MARK_AS_ADVANCED(
     OpenBLAS_LIB
     OpenBLAS
 )
-
diff --git a/docs/source/advanced_install.rst b/docs/source/advanced_install.rst
@@ -72,6 +72,29 @@ the minimal dependencies [#f3]_::
     $ conda install cmake openblas pybind11 scikit-build
     $ conda install pytorch -c pytorch
 
+
+Windows (Experimental)
+""""""""""""""""""""""
+
+On a Windows-based system, we recommend to install OpenBLAS following this
+`OpenBLAS - Visual Studio`_ installation and usage guide. It will require to
+install `MS Visual Studio 2019`_ and `Miniconda`_.
+
+After compile and install OpenBLAS, in the same Miniconda terminal, the
+following commands can be used for installing the minimal dependencies::
+
+    $ conda install pybind11 scikit-build
+    $ conda install pytorch -c pytorch
+
+To compile aihwkit, it is recommended to use the x64 Native Tools Command
+Promp for VS 2019.
+
+Note: If you want to use Pip instead Conda, you can use following commands::
+
+    $ pip install cmake scikit-build pybind11
+    $ pip install torch -f https://download.pytorch.org/whl/torch_stable.html
+
+
 Installing and compiling
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -128,3 +151,6 @@ of the command will help diagnosing the issue.
 .. _googletest: https://github.com/google/googletest
 .. _PyTorch: https://pytorch.org
 .. _OpenMP: https://openmp.llvm.org
+.. _OpenBLAS - Visual Studio: https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio
+.. _MS Visual Studio 2019: https://visualstudio.microsoft.com/vs/
+.. _Miniconda: https://docs.conda.io/en/latest/miniconda.html
diff --git a/src/aihwkit/simulator/rpu_base_src/rpu_base_tiles.cpp b/src/aihwkit/simulator/rpu_base_src/rpu_base_tiles.cpp
@@ -805,7 +805,7 @@ void declare_rpu_tiles(py::module &m) {
               return torch::empty({0});
             }
             torch::Tensor hidden_parameters =
-                torch::empty({v.size(), self.getDSize(), self.getXSize()});
+                torch::empty({(int)v.size(), self.getDSize(), self.getXSize()});
 
             std::vector<T *> data_ptrs(v.size());
             size_t size = self.getDSize() * self.getXSize();

diff --git a/src/rpucuda/rpu_pulsed.cpp b/src/rpucuda/rpu_pulsed.cpp
@@ -87,11 +87,11 @@ template <typename T> void RPUPulsed<T>::initialize(PulsedMetaParameter<T> *p, i
 
   // forward/backward pass
   // one could even choose the favorate forward/backward here... (based on par)
-  fb_pass_ = make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, this->rng_);
+  fb_pass_ = RPU::make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, this->rng_);
   fb_pass_->setIOPar(p->f_io, p->b_io);
 
   // pulsed update pass
-  pwu_ = make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, this->rng_);
+  pwu_ = RPU::make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, this->rng_);
   pwu_->setUpPar(p->up);
 }
 

diff --git a/src/rpucuda/rpu_transfer_device.cpp b/src/rpucuda/rpu_transfer_device.cpp
@@ -301,11 +301,11 @@ void TransferRPUDevice<T>::populate(
   par.initializeWithSize(this->x_size_, this->d_size_);
   auto shared_rng = std::make_shared<RNG<T>>(0); // we just take a new one here (seeds...)
   transfer_fb_pass_ =
-      make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, shared_rng);
+      RPU::make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, shared_rng);
   transfer_fb_pass_->setIOPar(par.transfer_io, par.transfer_io);
   // NOTE: the OUT_SCALE might be different for the transfer!! How to account for that?!?
 
-  transfer_pwu_ = make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, shared_rng);
+  transfer_pwu_ = RPU::make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, shared_rng);
   transfer_pwu_->setUpPar(par.transfer_up);
 
   this->reduce_weightening_.resize(this->n_devices_);