Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PTen] Add cmake function for kernels #38311

Merged
merged 10 commits into from
Dec 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ copy(inference_lib_dist
# the header file of pten is copied to the experimental directory,
# the include path of pten needs to be changed to adapt to inference api path
add_custom_command(TARGET inference_lib_dist POST_BUILD
COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/pten.cmake"
COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/pten_header.cmake"
COMMENT "Change pten header include path to adapt to inference api path")

# CAPI inference library for only inference
Expand Down
File renamed without changes.
175 changes: 175 additions & 0 deletions cmake/pten_kernel.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# call kernel_declare need to make sure the target of input is exists
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

后续pr 中可以删除注释中的 is

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, thx

function(kernel_declare TARGET_LIST)
foreach(kernel_path ${TARGET_LIST})
file(READ ${kernel_path} kernel_impl)
# TODO(chenweihang): rename PT_REGISTER_CTX_KERNEL to PT_REGISTER_KERNEL
# NOTE(chenweihang): now we don't recommend to use digit in kernel name
string(REGEX MATCH "PT_REGISTER_CTX_KERNEL\\([ \t\r\n]*[a-z_]*," first_registry "${kernel_impl}")
if (NOT first_registry STREQUAL "")
# parse the first kernel name
string(REPLACE "PT_REGISTER_CTX_KERNEL(" "" kernel_name "${first_registry}")
string(REPLACE "," "" kernel_name "${kernel_name}")
string(REGEX REPLACE "[ \t\r\n]+" "" kernel_name "${kernel_name}")
# append kernel declare into declarations.h
# TODO(chenweihang): default declare ALL_LAYOUT for each kernel
if (${kernel_path} MATCHES "./cpu\/")
file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
elseif (${kernel_path} MATCHES "./gpu\/")
file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n")
elseif (${kernel_path} MATCHES "./xpu\/")
file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n")
elseif (${kernel_path} MATCHES "./npu\/*")
file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, NPU, ALL_LAYOUT);\n")
else ()
# deal with device independent kernel, now we use CPU temporaary
file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
endif()
endif()
endforeach()
endfunction()

function(kernel_library TARGET)
set(common_srcs)
set(cpu_srcs)
set(gpu_srcs)
set(xpu_srcs)
set(npu_srcs)

set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

list(LENGTH kernel_library_SRCS kernel_library_SRCS_len)
# one kernel only match one impl file in each backend
# TODO(chenweihang): parse compile deps by include headers
if (${kernel_library_SRCS_len} EQUAL 0)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif()
if (WITH_GPU OR WITH_ROCM)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
endif()
endif()
if (WITH_XPU)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
list(APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
endif()
endif()
if (WITH_ASCEND_CL)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/npu/${TARGET}.cc)
list(APPEND npu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/npu/${TARGET}.cc)
endif()
endif()
else()
# TODO(chenweihang): impl compile by source later
endif()

list(LENGTH common_srcs common_srcs_len)
list(LENGTH cpu_srcs cpu_srcs_len)
list(LENGTH gpu_srcs gpu_srcs_len)
list(LENGTH xpu_srcs xpu_srcs_len)
list(LENGTH npu_srcs npu_srcs_len)

if (${common_srcs_len} GREATER 0)
# If the kernel has a device independent public implementation,
# we will use this implementation and will not adopt the implementation
# under specific devices
if (WITH_GPU)
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS})
elseif (WITH_ROCM)
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS})
else()
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS})
endif()
else()
# If the kernel has a header file declaration, but no corresponding
# implementation can be found, this is not allowed
if (${cpu_srcs_len} EQUAL 0 AND ${gpu_srcs_len} EQUAL 0 AND
${xpu_srcs_len} EQUAL 0 AND ${npu_srcs_len} EQUAL 0)
message(FATAL_ERROR "Cannot find any implementation for ${TARGET}")
else()
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS})
endif()
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS})
endif()
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${npu_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} ${npu_srcs} DEPS ${kernel_library_DEPS})
endif()
endif()
endif()
endif()

if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
${xpu_srcs_len} GREATER 0 OR ${npu_srcs_len} GREATER 0)
# append target into PTEN_KERNELS property
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
set(pten_kernels ${pten_kernels} ${TARGET})
set_property(GLOBAL PROPERTY PTEN_KERNELS ${pten_kernels})
endif()

# parse kernel name and auto generate kernel declaration
# here, we don't need to check WITH_XXX, because if not WITH_XXX, the
# xxx_srcs_len will be equal to 0
if (${common_srcs_len} GREATER 0)
kernel_declare(${common_srcs})
endif()
if (${cpu_srcs_len} GREATER 0)
kernel_declare(${cpu_srcs})
endif()
if (${gpu_srcs_len} GREATER 0)
kernel_declare(${gpu_srcs})
endif()
if (${xpu_srcs_len} GREATER 0)
kernel_declare(${xpu_srcs})
endif()
if (${npu_srcs_len} GREATER 0)
kernel_declare(${npu_srcs})
endif()
endfunction()

function(register_kernels)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

file(GLOB KERNELS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_kernel.h")
string(REPLACE ".h" "" KERNELS "${KERNELS}")
list(LENGTH register_kernels_DEPS register_kernels_DEPS_len)

foreach(target ${KERNELS})
list(FIND register_kernels_EXCLUDES ${target} _index)
if (${_index} EQUAL -1)
if (${register_kernels_DEPS_len} GREATER 0)
kernel_library(${target} DEPS ${register_kernels_DEPS})
else()
kernel_library(${target})
endif()
endif()
endforeach()
endfunction()
6 changes: 4 additions & 2 deletions paddle/pten/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ add_subdirectory(tests)

# make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu flatten)
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu)
endif()
if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
Expand Down
7 changes: 0 additions & 7 deletions paddle/pten/api/lib/kernel_declare.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,18 @@ limitations under the License. */
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed

PT_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
#endif

#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL(flatten, XPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(reshape, XPU, ALL_LAYOUT);
#endif
29 changes: 21 additions & 8 deletions paddle/pten/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
include(pten_kernel)

set(kernel_declare_file ${PADDLE_BINARY_DIR}/paddle/pten/kernels/declarations.h.tmp CACHE INTERNAL "declarations.h file")
set(kernel_declare_file_final ${PADDLE_BINARY_DIR}/paddle/pten/kernels/declarations.h)
file(WRITE ${kernel_declare_file} "// Generated by the paddle/pten/kernels/CMakeLists.txt. DO NOT EDIT!\n\n#pragma once\n\n")

# kernel primitive api
add_subdirectory(primitive)
# pten hybird functors and functions called by kernels
Expand All @@ -18,17 +24,24 @@ if(WITH_XPU)
add_subdirectory(xpu)
endif()

set(FLATTEN_DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
# pten depends all pten kernel targets
set_property(GLOBAL PROPERTY PTEN_KERNELS "")

set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function)

# auto build kernel targets by cmake
register_kernels(EXCLUDES flatten_kernel DEPS ${COMMON_KERNEL_DEPS})
# TODO(chenweihang): auto parse compile deps by include headers later
set(FLATTEN_DEPS ${COMMON_KERNEL_DEPS} utils_cpu unary)
if(WITH_GPU OR WITH_ROCM)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu)
elseif(WITH_XPU)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu)
endif()
kernel_library(flatten_kernel DEPS ${FLATTEN_DEPS})

if(WITH_GPU)
nv_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
elseif(WITH_ROCM)
hip_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
else()
cc_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
endif()
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
message(STATUS "PTEN_KERNELS: ${pten_kernels}")

copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
2 changes: 0 additions & 2 deletions paddle/pten/kernels/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory
cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
cc_library(scale_kernel_cpu SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(full_kernel_cpu SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
1 change: 0 additions & 1 deletion paddle/pten/kernels/flatten_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
// limitations under the License.

#include "paddle/pten/kernels/flatten_kernel.h"
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/infermeta/unary.h"
Expand Down
4 changes: 0 additions & 4 deletions paddle/pten/kernels/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@ if(WITH_GPU)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
nv_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
hip_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
1 change: 1 addition & 0 deletions python/paddle/utils/code_gen/api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ def source_include(header_file_path):
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/declarations.h"
"""


Expand Down