Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
change windows build system.
Browse files Browse the repository at this point in the history
add gen_warp cpp version
add add_custom_command to run warp_gen
add download cmake
add option
change option
add dynamic read mxnet dll
  • Loading branch information
yajiedesign committed Dec 30, 2019
1 parent 230ceee commit f366d9e
Show file tree
Hide file tree
Showing 4 changed files with 451 additions and 21 deletions.
93 changes: 72 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
cmake_dependent_option(USE_SPLIT_ARCH_DLL "Build a separate DLL for each Cuda arch (Windows only)." ON "MSVC" OFF)


message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
Expand Down Expand Up @@ -100,6 +102,7 @@ endif()

if(MSVC)
set(SYSTEM_ARCHITECTURE x86_64)
enable_language(ASM_MASM)
else()
execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE SYSTEM_ARCHITECTURE)
endif()
Expand Down Expand Up @@ -192,9 +195,11 @@ else()
add_definitions(-DDMLC_USE_CXX11=1)
add_definitions(-DDMLC_USE_CXX14=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(CMAKE_CUDA_STANDARD 14)
elseif(SUPPORT_CXX11)
add_definitions(-DDMLC_USE_CXX11=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CUDA_STANDARD 11)
elseif(SUPPORT_CXX0X)
add_definitions(-DDMLC_USE_CXX11=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
Expand Down Expand Up @@ -678,30 +683,66 @@ if(UNIX)
target_link_libraries(mxnet_static PUBLIC ${CMAKE_DL_LIBS})
target_compile_options(sample_lib PUBLIC -shared)
set_target_properties(mxnet_static PROPERTIES OUTPUT_NAME mxnet)
else()
add_library(mxnet SHARED ${SOURCE})
elseif(MSVC)
target_compile_options(sample_lib PUBLIC /LD)
set_target_properties(sample_lib PROPERTIES PREFIX "lib")
endif()

if(USE_CUDA AND MSVC)
target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd -Gy>")
target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT -Gy>")
if(USE_CUDA)
if(FIRST_CUDA AND MSVC)
if(USE_SPLIT_ARCH_DLL)
add_executable(gen_warp tools/windowsbuild/gen_warp.cpp)
add_library(mxnet SHARED tools/windowsbuild/warp_dll.cpp ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp
${CMAKE_BINARY_DIR}/warp_gen.asm)
target_link_libraries(mxnet PRIVATE cudart Shlwapi)
list(GET cuda_arch 0 mxnet_first_arch)
foreach(arch ${cuda_arch})
add_library(mxnet_${arch} SHARED ${SOURCE})
target_compile_options(
mxnet_${arch}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:--gpu-architecture=compute_${arch}>"
)
target_compile_options(
mxnet_${arch}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:--gpu-code=sm_${arch},compute_${arch}>"
)
target_compile_options(
mxnet_${arch}
PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
target_compile_options(
mxnet_${arch}
PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")
endforeach()

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp ${CMAKE_BINARY_DIR}/warp_gen.asm
COMMAND gen_warp $<TARGET_FILE:mxnet_${mxnet_first_arch}> WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/ DEPENDS $<TARGET_FILE:mxnet_${mxnet_first_arch}>)
else(USE_SPLIT_ARCH_DLL)
string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
add_library(mxnet SHARED ${SOURCE})
target_compile_options(
mxnet
PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
target_compile_options(
mxnet
PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")

endif(USE_SPLIT_ARCH_DLL)
else()
add_library(mxnet SHARED ${SOURCE})
endif()
else()
add_library(mxnet SHARED ${SOURCE})
endif()

endif()

if(USE_DIST_KVSTORE)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ps-lite/CMakeLists.txt)
add_subdirectory("3rdparty/ps-lite")
list(APPEND pslite_LINKER_LIBS pslite protobuf)
target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG})
else()
list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE})
endif()
target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})

else()
set(pslite_LINKER_LIBS protobuf zmq-static)
endif()
Expand Down Expand Up @@ -735,13 +776,24 @@ if(USE_TVM_OP)
)
endif()

target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})

if(USE_PLUGINS_WARPCTC)
target_link_libraries(mxnet PUBLIC debug ${WARPCTC_LIB_DEBUG})
target_link_libraries(mxnet PUBLIC optimized ${WARPCTC_LIB_RELEASE})
list(APPEND mxnet_LINKER_LIBS ${WARPCTC_LIB})
endif()

if(MSVC)
if(FIRST_CUDA AND USE_SPLIT_ARCH_DLL)
foreach(arch ${cuda_arch})
target_link_libraries(mxnet_${arch} PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet_${arch} PUBLIC dmlc)
endforeach()
else()
target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet PUBLIC dmlc)
endif()
else()
target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet PUBLIC dmlc)
endif()

if(USE_OPENCV AND OpenCV_VERSION_MAJOR GREATER 2)
add_executable(im2rec "tools/im2rec.cc")
Expand All @@ -761,7 +813,6 @@ else()
is required for im2rec, im2rec will not be available")
endif()

target_link_libraries(mxnet PUBLIC dmlc)

if(MSVC AND USE_MXNET_LIB_NAMING)
set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
Expand Down
19 changes: 19 additions & 0 deletions tools/windowsbuild/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->

<!--- http://www.apache.org/licenses/LICENSE-2.0 -->

<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->

Due to dll size limitation under windows. Split dll into different dlls according to arch
Reference https://github.com/apache/incubator-mxnet/pull/16980
209 changes: 209 additions & 0 deletions tools/windowsbuild/gen_warp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <iostream>
#include <io.h>
#include <Windows.h>
#include <cstdint>
#include <memory>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>

#define IMAGE_SIZEOF_SIGNATURE 4


DWORD rva_to_foa(IN DWORD RVA, IN PIMAGE_SECTION_HEADER section_header)
{

size_t count = 0;
for (count = 1; RVA > (section_header->VirtualAddress + section_header->Misc.VirtualSize); count++, section_header++);

DWORD FOA = RVA - section_header->VirtualAddress + section_header->PointerToRawData;

return FOA;
}

std::string format(const char* format, ...)
{
va_list args;
va_start(args, format);
#ifndef _MSC_VER
size_t size = std::snprintf(nullptr, 0, format, args) + 1; // Extra space for '\0'
std::unique_ptr<char[]> buf(new char[size]);
std::vsnprintf(buf.get(), size, format, args);
return std::string(buf.get(), buf.get() + size - 1); // We don't want the '\0' inside
#else
int size = _vscprintf(format, args) +1;
std::unique_ptr<char[]> buf(new char[size]);
vsnprintf_s(buf.get(), size, _TRUNCATE, format, args);
return std::string(buf.get());
#endif
va_end(args);
}

int main(int argc, char* argv[])
{

if (argc != 2)
{
return 0;
}

//open file
const HANDLE h_file = CreateFile(
argv[1],
GENERIC_READ ,
FILE_SHARE_READ ,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
nullptr);


DWORD size_high;
const DWORD size_low = GetFileSize(h_file, &size_high);

uint64_t dll_size = ((uint64_t(size_high)) << 32) + (uint64_t)size_low;

// Create File Mapping
const HANDLE h_map_file = CreateFileMapping(
h_file,
nullptr,
PAGE_READONLY,
size_high,
size_low,
nullptr);
if (h_map_file == INVALID_HANDLE_VALUE || h_map_file == nullptr)
{
std::cout << "error";
CloseHandle(h_file);
return 0;
}

//Map File to memory
void* pv_file = MapViewOfFile(
h_map_file,
FILE_MAP_READ,
0,
0,
0);

if (pv_file == nullptr)
{
std::cout << "error";
CloseHandle(h_file);
return 0;
}

uint8_t* p = static_cast<uint8_t*>(pv_file);


PIMAGE_DOS_HEADER dos_header = reinterpret_cast<PIMAGE_DOS_HEADER>(p);

const PIMAGE_NT_HEADERS nt_headers = reinterpret_cast<const PIMAGE_NT_HEADERS>(p + dos_header->e_lfanew);

const PIMAGE_FILE_HEADER file_header = &nt_headers->FileHeader;

PIMAGE_OPTIONAL_HEADER optional_header = (PIMAGE_OPTIONAL_HEADER)(&nt_headers->OptionalHeader);

const DWORD file_alignment = optional_header->FileAlignment;


PIMAGE_SECTION_HEADER section_table =
reinterpret_cast<PIMAGE_SECTION_HEADER>(p + dos_header->e_lfanew +
IMAGE_SIZEOF_SIGNATURE +
IMAGE_SIZEOF_FILE_HEADER +
file_header->SizeOfOptionalHeader);

DWORD export_foa = rva_to_foa(optional_header->DataDirectory[0].VirtualAddress, section_table);

PIMAGE_EXPORT_DIRECTORY export_directory = (PIMAGE_EXPORT_DIRECTORY)(p + export_foa);


DWORD name_list_foa = rva_to_foa(export_directory->AddressOfNames, section_table);

PDWORD name_list = (PDWORD)(p + name_list_foa);




std::vector<std::string> func_list;

for (size_t i = 0; i < export_directory->NumberOfNames; i++, name_list++)
{

DWORD name_foa = rva_to_foa(* name_list, section_table);
char* name = (char*)(p + name_foa);
func_list.emplace_back(name);

}


UnmapViewOfFile(pv_file);
CloseHandle(h_map_file);
CloseHandle(h_file);


std::ofstream gen_cpp_obj;
gen_cpp_obj.open("warp_gen_cpp.cpp", std::ios::out | std::ios::trunc);
gen_cpp_obj << "#include <Windows.h>\n";
gen_cpp_obj << "extern \"C\" \n{\n";


for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_cpp_obj << format("void * warp_point_%d;\n", i);
gen_cpp_obj << format("#pragma comment(linker, \"/export:%s=warp_func_%d\")\n", fun.c_str(), i);
gen_cpp_obj << format("void warp_func_%d();\n", i);
gen_cpp_obj << ("\n");
}
gen_cpp_obj << ("}\n");


gen_cpp_obj << ("void load_function(HMODULE hm)\n{\n");
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_cpp_obj << format("warp_point_%d = (void*)GetProcAddress(hm, \"%s\");\n", i, fun.c_str());
}
gen_cpp_obj << ("}\n");

gen_cpp_obj.close();



std::ofstream gen_asm_obj;
gen_asm_obj.open("warp_gen.asm", std::ios::out | std::ios::trunc);
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_asm_obj << format("EXTERN warp_point_%d:QWORD;\n", i);
}
gen_asm_obj << ".CODE\n";
for (size_t i = 0; i < func_list.size(); ++i)
{
auto fun = func_list[i];
gen_asm_obj << format("warp_func_%d PROC\njmp warp_point_%d;\nwarp_func_%d ENDP\n", i,i,i);
}
gen_asm_obj << "END\n";
gen_asm_obj.close();
}
Loading

0 comments on commit f366d9e

Please sign in to comment.