Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows build support #58

Merged
merged 5 commits into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,16 @@ include(cmake/dependencies_cuda.cmake)
include(cmake/dependencies_test.cmake)

# Set compilation flags.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-narrowing -Wno-strict-overflow")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -ftree-vectorize")
if(WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-narrowing -Wno-strict-overflow")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -ftree-vectorize")
endif()


add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
if (APPLE)
string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
Expand All @@ -52,20 +60,29 @@ add_subdirectory(src/rpucuda)
include_directories(SYSTEM src/rpucuda)

add_library(RPU_CPU ${RPU_CPU_SRCS})

target_link_libraries(RPU_CPU ${RPU_DEPENDENCY_LIBS})
if(WIN32)
target_link_libraries(RPU_CPU c10.lib torch_cpu.lib)
endif()

set_target_properties(RPU_CPU PROPERTIES CXX_STANDARD 11
POSITION_INDEPENDENT_CODE ON)

if (USE_CUDA)
add_subdirectory(src/rpucuda/cuda)
include_directories(SYSTEM src/rpucuda/cuda)

add_library(RPU_GPU ${RPU_GPU_SRCS})

target_link_libraries(RPU_GPU RPU_CPU cublas curand ${RPU_DEPENDENCY_LIBS})
if(WIN32)
target_link_libraries(RPU_GPU c10.lib torch_cuda.lib)
endif(WIN32)

set_target_properties(RPU_GPU PROPERTIES ${CUDA_TARGET_PROPERTIES})
set_property(TARGET RPU_GPU PROPERTY CUDA_ARCHITECTURES ${RPU_CUDA_ARCHITECTURES})
add_dependencies(RPU_GPU cub)
endif()
endif(USE_CUDA)

# Add aihwkit targets.
add_subdirectory(src/aihwkit/simulator)
Expand Down
4 changes: 3 additions & 1 deletion cmake/Modules/FindOpenBLAS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ SET(Open_BLAS_INCLUDE_SEARCH_PATHS
/usr/local/include/openblas-base
/usr/local/opt/openblas/include
/opt/OpenBLAS/include
/opt/include/OpenBLAS
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/include
$ENV{OPENBLAS_ROOT_DIR}
Expand All @@ -42,6 +43,8 @@ SET(Open_BLAS_LIB_SEARCH_PATHS
/usr/local/lib64
/usr/local/opt/openblas/lib
/opt/OpenBLAS/lib
/opt/lib
/opt/lib/OpenBLAS
$ENV{OpenBLAS}
$ENV{OpenBLAS}/lib
$ENV{OpenBLAS_HOME}
Expand Down Expand Up @@ -85,4 +88,3 @@ MARK_AS_ADVANCED(
OpenBLAS_LIB
OpenBLAS
)

26 changes: 26 additions & 0 deletions docs/source/advanced_install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,29 @@ the minimal dependencies [#f3]_::
$ conda install cmake openblas pybind11 scikit-build
$ conda install pytorch -c pytorch


Windows (Experimental)
""""""""""""""""""""""

On a Windows-based system, we recommend to install OpenBLAS following this
`OpenBLAS - Visual Studio`_ installation and usage guide. It will require to
install `MS Visual Studio 2019`_ and `Miniconda`_.

After compile and install OpenBLAS, in the same Miniconda terminal, the
following commands can be used for installing the minimal dependencies::

$ conda install pybind11 scikit-build
$ conda install pytorch -c pytorch

To compile aihwkit, it is recommended to use the x64 Native Tools Command
Promp for VS 2019.

Note: If you want to use Pip instead Conda, you can use following commands::

$ pip install cmake scikit-build pybind11
$ pip install torch -f https://download.pytorch.org/whl/torch_stable.html


Installing and compiling
~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -128,3 +151,6 @@ of the command will help diagnosing the issue.
.. _googletest: https://github.com/google/googletest
.. _PyTorch: https://pytorch.org
.. _OpenMP: https://openmp.llvm.org
.. _OpenBLAS - Visual Studio: https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio
.. _MS Visual Studio 2019: https://visualstudio.microsoft.com/vs/
.. _Miniconda: https://docs.conda.io/en/latest/miniconda.html
2 changes: 1 addition & 1 deletion src/aihwkit/simulator/rpu_base_src/rpu_base_tiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ void declare_rpu_tiles(py::module &m) {
return torch::empty({0});
}
torch::Tensor hidden_parameters =
torch::empty({v.size(), self.getDSize(), self.getXSize()});
torch::empty({(int)v.size(), self.getDSize(), self.getXSize()});

std::vector<T *> data_ptrs(v.size());
size_t size = self.getDSize() * self.getXSize();
Expand Down
4 changes: 2 additions & 2 deletions src/rpucuda/rpu_pulsed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ template <typename T> void RPUPulsed<T>::initialize(PulsedMetaParameter<T> *p, i

// forward/backward pass
// one could even choose the favorate forward/backward here... (based on par)
fb_pass_ = make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, this->rng_);
fb_pass_ = RPU::make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, this->rng_);
fb_pass_->setIOPar(p->f_io, p->b_io);

// pulsed update pass
pwu_ = make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, this->rng_);
pwu_ = RPU::make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, this->rng_);
pwu_->setUpPar(p->up);
}

Expand Down
4 changes: 2 additions & 2 deletions src/rpucuda/rpu_transfer_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,11 @@ void TransferRPUDevice<T>::populate(
par.initializeWithSize(this->x_size_, this->d_size_);
auto shared_rng = std::make_shared<RNG<T>>(0); // we just take a new one here (seeds...)
transfer_fb_pass_ =
make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, shared_rng);
RPU::make_unique<ForwardBackwardPassIOManaged<T>>(this->x_size_, this->d_size_, shared_rng);
transfer_fb_pass_->setIOPar(par.transfer_io, par.transfer_io);
// NOTE: the OUT_SCALE might be different for the transfer!! How to account for that?!?

transfer_pwu_ = make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, shared_rng);
transfer_pwu_ = RPU::make_unique<PulsedRPUWeightUpdater<T>>(this->x_size_, this->d_size_, shared_rng);
transfer_pwu_->setUpPar(par.transfer_up);

this->reduce_weightening_.resize(this->n_devices_);
Expand Down