Skip to content

Commit

Permalink
extract cmake arrow (facebookincubator#169)
Browse files Browse the repository at this point in the history
cmake policy
  • Loading branch information
marin-ma authored May 24, 2022
1 parent 724c683 commit b247d5c
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 380 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
cd cpp/
mkdir -p build
cd build
cmake .. -DBUILD_ARROW=0 -DTESTS=1
cmake .. -DBUILD_ARROW=0 -DBUILD_TESTS=1
make
# velox-backend-test:
Expand Down
137 changes: 137 additions & 0 deletions cpp/CMake/BuildArrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

include(ExternalProject)

set(ARROW_EP_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-install")
message(STATUS "ARROW_EP_INSTALL_PREFIX: ${ARROW_EP_INSTALL_PREFIX}")

set(ARROW_EP_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep")
message(STATUS "ARROW_EP_SOURCE_DIR: ${ARROW_EP_SOURCE_DIR}")

set(ARROW_INCLUDE_DIR "${ARROW_EP_INSTALL_PREFIX}/include")
set(BINARY_RELEASE_DIR "${root_directory}/releases")

ExternalProject_Add(arrow_ep
GIT_REPOSITORY https://github.com/oap-project/arrow.git
SOURCE_DIR ${ARROW_EP_SOURCE_DIR}
GIT_TAG arrow-8.0.0-gluten-20220427a
BUILD_IN_SOURCE 1
INSTALL_DIR ${ARROW_EP_INSTALL_PREFIX}
SOURCE_SUBDIR cpp
CMAKE_ARGS
-DARROW_BUILD_STATIC=OFF
-DARROW_BUILD_SHARED=ON
-DARROW_SUBSTRAIT=ON
-DARROW_COMPUTE=ON
-DARROW_S3=ON
-DARROW_GANDIVA_JAVA=ON
-DARROW_GANDIVA=ON
-DARROW_PARQUET=ON
-DARROW_CSV=ON
-DARROW_HDFS=ON
-DARROW_BOOST_USE_SHARED=OFF
-DARROW_JNI=ON
-DARROW_DATASET=ON
-DARROW_WITH_PROTOBUF=ON
-DARROW_WITH_SNAPPY=ON
-DARROW_WITH_LZ4=ON
-DARROW_WITH_ZSTD=OFF
-DARROW_WITH_BROTLI=OFF
-DARROW_WITH_ZLIB=OFF
-DARROW_WITH_FASTPFOR=ON
-DARROW_FILESYSTEM=ON
-DARROW_JSON=ON
-DARROW_FLIGHT=OFF
-DARROW_JEMALLOC=ON
-DARROW_SIMD_LEVEL=AVX2
-DARROW_RUNTIME_SIMD_LEVEL=MAX
-DARROW_DEPENDENCY_SOURCE=BUNDLED
-DCMAKE_INSTALL_PREFIX=${ARROW_EP_INSTALL_PREFIX}
-DCMAKE_INSTALL_LIBDIR=lib)

ExternalProject_Add_Step(arrow_ep java_c_abi
COMMAND sh -c "mkdir -p build && cd build && cmake .. && cmake --build ."
COMMENT "Build Arrow Java C Data Interface"
DEPENDEES mkdir download update patch configure build install
WORKING_DIRECTORY "${ARROW_EP_SOURCE_DIR}/java/c"
)

ExternalProject_Add_Step(arrow_ep java_install
COMMAND mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=${ARROW_EP_INSTALL_PREFIX}/lib -DskipTests -Dcheckstyle.skip
COMMENT "Arrow Java maven install after CPP make install"
DEPENDEES mkdir download update patch configure build install java_c_abi
WORKING_DIRECTORY "${ARROW_EP_SOURCE_DIR}/java"
)

# Copy Arrow Headers to releases/include
ExternalProject_Add_Step(arrow_ep copy_arrow_header
COMMAND cp -rf ${ARROW_EP_INSTALL_PREFIX}/include/ ${root_directory}/releases/
COMMENT "Arrow Header to releases/include"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_EP_INSTALL_PREFIX}/"
)

add_dependencies(arrow_ep jni_proto)

message(STATUS "Building Static ARROW: ${STATIC_ARROW}")

if(STATIC_ARROW)
# Load Static Arrow Library
message(FATAL_ERROR "Not Support Static Arrow")

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)

set(ARROW_LIB_NAME arrow_bundled_dependencies)

set(
ARROW_STATIC_LIB
"${ARROW_EP_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
add_library(Arrow::arrow STATIC IMPORTED)
set_target_properties(Arrow::arrow
PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES
"${ARROW_EP_INSTALL_PREFIX}/include")
add_dependencies(Arrow::arrow arrow_ep)

set(
ARROW_DATASET_JNI_STATIC_LIB
"${ARROW_EP_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_DATASET_JNI_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
add_library(Arrow::arrow_dataset_jni STATIC IMPORTED)
set_target_properties(Arrow::arrow_dataset_jni
PROPERTIES IMPORTED_LOCATION "${ARROW_DATASET_JNI_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES
"${ARROW_EP_INSTALL_PREFIX}/include")
add_dependencies(Arrow::arrow_dataset_jni arrow_ep)

# Load Static Gandiva Library
set(
GANDIVA_STATIC_LIB
"${ARROW_EP_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
add_library(Arrow::gandiva STATIC IMPORTED)
set_target_properties(Arrow::gandiva
PROPERTIES IMPORTED_LOCATION "${GANDIVA_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES
"${ARROW_EP_INSTALL_PREFIX}/include")
add_dependencies(Arrow::gandiva arrow_ep)
target_link_libraries(spark_columnar_jni PRIVATE Arrow::arrow Arrow::arrow_dataset_jni Arrow::gandiva Threads::Threads)

endif()
95 changes: 95 additions & 0 deletions cpp/CMake/ConfigArrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set(ARROW_SHARED_LIBRARY_SUFFIX ".so.800")
set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".so.800.0.0")

set(ARROW_LIB_NAME "arrow")
set(GANDIVA_LIB_NAME "gandiva")
set(PARQUET_LIB_NAME "parquet")
set(ARROW_DATASET_LIB_NAME "arrow_dataset")
set(ARROW_DATASET_JNI_LIB_NAME "arrow_dataset_jni")
set(ARROW_SUBSTRAIT_LIB_NAME "arrow_substrait")

function(FIND_ARROW_LIB LIB_NAME)
if(NOT TARGET Arrow::${LIB_NAME})
set(ARROW_LIB_FULL_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX})
add_library(Arrow::${LIB_NAME} SHARED IMPORTED)
set_target_properties(Arrow::${LIB_NAME}
PROPERTIES IMPORTED_LOCATION "${root_directory}/releases/${ARROW_LIB_FULL_NAME}"
INTERFACE_INCLUDE_DIRECTORIES
"${root_directory}/releases/include")

if(BUILD_ARROW)
set(ARROW_LIB_PARENT_FULL_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_PARENT_SUFFIX})
add_custom_command(
OUTPUT ${root_directory}/releases/${ARROW_LIB_PARENT_FULL_NAME} ${root_directory}/releases/${ARROW_LIB_FULL_NAME}
COMMAND cp -a ${ARROW_EP_INSTALL_PREFIX}/lib/${ARROW_LIB_PARENT_FULL_NAME} ${root_directory}/releases/
COMMAND cp -a ${ARROW_EP_INSTALL_PREFIX}/lib/${ARROW_LIB_FULL_NAME} ${root_directory}/releases/
DEPENDS arrow_ep)
add_custom_target(COPY_LIB_${LIB_NAME} ALL
DEPENDS ${root_directory}/releases/${ARROW_LIB_FULL_NAME})
add_dependencies(Arrow::${LIB_NAME} COPY_LIB_${LIB_NAME})
else()
find_library(ARROW_LIB_${LIB_NAME}
NAMES ${ARROW_LIB_FULL_NAME}
PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR}
NO_DEFAULT_PATH)
if(NOT ARROW_LIB_${LIB_NAME})
message(FATAL_ERROR "Arrow Library Not Found: ${ARROW_LIB_FULL_NAME}")
else()
message(STATUS "Found Arrow Library: ${ARROW_LIB_${LIB_NAME}}")
endif()
file(COPY ${ARROW_LIB_${LIB_NAME}} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN)
endif()
endif()
endfunction()

if(BUILD_ARROW)
include(BuildArrow)
else()
message(STATUS "Use existing ARROW libraries")

set(ARROW_BFS_INSTALL_DIR "/usr/local" CACHE PATH "Arrow Build from Source dir")
set(ARROW_BFS_LIB_DIR "${ARROW_BFS_INSTALL_DIR}/lib")
set(ARROW_BFS_INCLUDE_DIR "${ARROW_BFS_INSTALL_DIR}/include")

set(ARROW_ROOT "/usr/local" CACHE PATH "Arrow Root dir")
set(ARROW_LIB_DIR "${ARROW_ROOT}/lib")
set(ARROW_LIB64_DIR "${ARROW_ROOT}/lib64")
set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/include")

message(STATUS "Set Arrow Library Directory in ${ARROW_BFS_LIB_DIR} or ${ARROW_LIB_DIR} or ${ARROW_LIB64_DIR}")
message(STATUS "Set Arrow Include Directory in ${ARROW_BFS_INCLUDE_DIR} or ${ARROW_INCLUDE_DIR}")

if(EXISTS ${ARROW_BFS_INCLUDE_DIR}/arrow)
set(ARROW_INCLUDE_SRC_DIR ${ARROW_BFS_INCLUDE_DIR})
elseif(EXISTS ${ARROW_INCLUDE_DIR}/arrow)
set(ARROW_INCLUDE_SRC_DIR ${ARROW_INCLUDE_DIR})
else()
message(FATAL_ERROR "Arrow headers not found either in ARROW_BFS_INSTALL_DIR or ARROW_ROOT.")
endif()

# Copy arrow headers
set(ARROW_INCLUDE_DST_DIR ${root_directory}/releases/include)
set(ARROW_INCLUDE_SUB_DIR arrow gandiva jni parquet)
message(STATUS "Copy Arrow headers from ${ARROW_INCLUDE_SRC_DIR} to ${ARROW_INCLUDE_DST_DIR}")
file(MAKE_DIRECTORY ${ARROW_INCLUDE_DST_DIR})
foreach(SUB_DIR ${ARROW_INCLUDE_SUB_DIR})
file(COPY ${ARROW_INCLUDE_SRC_DIR}/${SUB_DIR} DESTINATION ${ARROW_INCLUDE_DST_DIR})
endforeach()
endif()
86 changes: 77 additions & 9 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,25 @@
cmake_minimum_required(VERSION 3.16)
message(STATUS "Building using CMake version: ${CMAKE_VERSION}")

if (POLICY CMP0126)
# The set(CACHE) command does not remove any normal variable of the same name from the current scope
# https://cmake.org/cmake/help/latest/policy/CMP0126.html
if(POLICY CMP0126)
cmake_policy(SET CMP0126 NEW)
endif ()
endif()

if (NOT DEFINED CMAKE_BUILD_TYPE)
if(NOT DEFINED CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE
Release
CACHE STRING "Choose the type of build.")
endif ()
endif()

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})

project(spark_columnar_plugin)

option(DEBUG "Enable Debug Info" OFF)
option(BUILD_TESTS "Build Tests" OFF)
option(BUILD_BENCHMARKS "Build Benchmarks" OFF)
option(BUILD_GAZELLE_CPP "Build the gazelle-cpp libraries" OFF)

set(root_directory ${PROJECT_BINARY_DIR})
Expand All @@ -40,20 +45,83 @@ set(root_directory ${PROJECT_BINARY_DIR})
# Compiler flags
#

if (DEBUG)
if(DEBUG)
add_definitions(-DDEBUG)
endif ()
endif()

#
# Dependencies
#

option(BUILD_ARROW "Build Arrow from Source" ON)
option(STATIC_ARROW "Build Arrow with Static Libraries" OFF)

include(ConfigArrow)

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)

find_package(JNI REQUIRED)

if(BUILD_TESTS)
include(GoogleTest)
enable_testing()
endif()

function(ADD_TEST_CASE TEST_NAME)
set(options)
set(one_value_args)
set(multi_value_args
SOURCES
EXTRA_LINK_LIBS
EXTRA_INCLUDES
EXTRA_DEPENDENCIES)

cmake_parse_arguments(ARG
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})

if(ARG_SOURCES)
set(SOURCES ${ARG_SOURCES})
else()
message(FATAL_ERROR "No sources specified for test ${TEST_NAME}")
endif()

add_executable(${TEST_NAME} ${SOURCES})
target_link_libraries(${TEST_NAME} spark_columnar_jni gtest gtest_main Threads::Threads)
target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/src)

if(ARG_EXTRA_LINK_LIBS)
target_link_libraries(${TEST_NAME} ${ARG_EXTRA_LINK_LIBS})
endif()

if(ARG_EXTRA_INCLUDES)
target_include_directories(${TEST_NAME} SYSTEM PUBLIC ${ARG_EXTRA_INCLUDES})
endif()

if(ARG_EXTRA_DEPENDENCIES)
add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES})
endif()

gtest_discover_tests(${TEST_NAME})
endfunction()

if(BUILD_BENCHMARKS)
find_package(benchmark CONFIG REQUIRED)
endif()

#
# Subdirectories
#

add_subdirectory(src)

if (BUILD_GAZELLE_CPP)
if(BUILD_GAZELLE_CPP)
add_subdirectory(gazelle-cpp)
endif ()
endif()

if (BUILD_VELOX)
if(BUILD_VELOX)
add_subdirectory(velox)
endif()
Loading

0 comments on commit b247d5c

Please sign in to comment.