diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake index 22a3f741f..47857a321 100644 --- a/cmake/Codegen.cmake +++ b/cmake/Codegen.cmake @@ -1,89 +1,95 @@ -if(Codegen_GPU_cmake_included) +if(Codegen_XPU_cmake_included) return() endif() -set(Codegen_GPU_cmake_included true) +set(Codegen_XPU_cmake_included true) -set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen/") +set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen") +set(BUILD_TORCH_ATEN_GENERATED "${CMAKE_BINARY_DIR}/aten/src/ATen") file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED}) -set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) -set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) -set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) -set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) -set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) +set(RegisterXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) +set(RegisterSparseXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) +set(RegisterSparseCsrXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) +set(RegisterNestedTensorXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) +set(XPUFallback_TEMPLATE ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) +set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend) +set(XPU_AOTI_SHIM_HEADER ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h) +set(XPU_AOTI_SHIM_SOURCE ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp) if(WIN32) set(FILE_DISPLAY_CMD type) # replace forward slash with back slash for compatibility with 'type' command on Windows - string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}") - string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}") - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH}) + string(REPLACE "/" "\\" RegisterXPU_GENERATED_BACKSLASH "${RegisterXPU_GENERATED}") + string(REPLACE "/" "\\" XPUFallback_TEMPLATE_BACKSLASH "${XPUFallback_TEMPLATE}") + set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE_BACKSLASH} ">>" ${RegisterXPU_GENERATED_BACKSLASH}) else() set(FILE_DISPLAY_CMD cat) - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH}) + set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE} ">>" ${RegisterXPU_GENERATED}) endif() -function(GEN_BACKEND file_yaml) - set(generated_files "") - foreach(f ${ARGN}) - list(APPEND generated_files "${BUILD_TORCH_XPU_ATEN_GENERATED}/${f}") - endforeach() - file(GLOB_RECURSE depended_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}) - add_custom_command( - OUTPUT ${generated_files} - COMMAND - "${PYTHON_EXECUTABLE}" -m torchgen.gen_backend_stubs - --output_dir ${BUILD_TORCH_XPU_ATEN_GENERATED} - --source_yaml ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml} - COMMAND - ${REGISTER_FALLBACK_CMD} - ${SIMPLE_TRACE} - WORKING_DIRECTORY ${TORCH_ROOT} - DEPENDS - ${depended_files} - ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml} - ${XPUFallback_PATH} - ) -endfunction(GEN_BACKEND) - - -set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) -set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) -set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) -set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) -set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) -set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend) function(GEN_XPU file_yaml) set(generated_files "") foreach(f ${ARGN}) list(APPEND generated_files "${f}") endforeach() - file(GLOB_RECURSE depend_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}) - set(CODEGEN_TEMPLATE ${TORCH_XPU_OPS_ROOT}/yaml/) + set(CODEGEN_XPU_YAML_DIR ${TORCH_XPU_OPS_ROOT}/yaml) # Codegen prepare process if(WIN32) - string(REPLACE "/" "\\" DestPATH "${CODEGEN_TEMPLATE}templates") + string(REPLACE "/" "\\" DestPATH "${CODEGEN_XPU_YAML_DIR}/templates") string(REPLACE "/" "\\" SrcPATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates") execute_process(COMMAND cmd /c xcopy ${SrcPATH} ${DestPATH} /E /H /C /I /Y > nul) - string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}") - string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}") - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH}) else() - execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_TEMPLATE}) # soft link to pytorch templates - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH}) + execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_XPU_YAML_DIR}) # soft link to pytorch templates endif() - add_custom_command( - OUTPUT ${generated_files} - COMMAND + + set(XPU_CODEGEN_COMMAND "${PYTHON_EXECUTABLE}" -m torchgen.gen - --source-path ${TORCH_XPU_OPS_ROOT}/yaml/ + --source-path ${CODEGEN_XPU_YAML_DIR} --install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED} --per-operator-headers - --static-dispatch-backend --backend-whitelist XPU SparseXPU SparseCsrXPU NestedTensorXPU - # --xpu: generate in-tree RegisterXPU_0.cpp for in-tree OPs --xpu + ) + + set(XPU_INSTALL_HEADER_COMMAND + "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py + --src-header-dir ${BUILD_TORCH_XPU_ATEN_GENERATED} + --dst-header-dir ${BUILD_TORCH_ATEN_GENERATED} + ) + + execute_process( + COMMAND + ${XPU_CODEGEN_COMMAND} + --generate headers + --dry-run + --output-dependencies ${BUILD_TORCH_XPU_ATEN_GENERATED}/generated_headers.cmake + RESULT_VARIABLE RETURN_VALUE + WORKING_DIRECTORY ${TORCH_ROOT} + ) + + if(NOT RETURN_VALUE EQUAL 0) + message(FATAL_ERROR "Failed to get generated_headers list") + endif() + + execute_process( + COMMAND + ${XPU_INSTALL_HEADER_COMMAND} + --dry-run + RESULT_VARIABLE RETURN_VALUE + WORKING_DIRECTORY ${TORCH_ROOT} + ) + + if(NOT RETURN_VALUE EQUAL 0) + message(FATAL_ERROR "Failed to get XPU header list to install") + endif() + + add_custom_command( + COMMENT "Generating XPU ATen Codegen..." + OUTPUT ${generated_files} + COMMAND + ${XPU_CODEGEN_COMMAND} + --static-dispatch-backend # --update-aoti-c-shim: generate extend/c_shim_xpu.h --update-aoti-c-shim # --exten-aoti-c-shim: specifiy the extend/c_shim_xpu @@ -94,17 +100,14 @@ function(GEN_XPU file_yaml) --aoti-install-dir=${XPU_AOTI_INSTALL_DIR} COMMAND ${REGISTER_FALLBACK_CMD} - # Codegen post-process - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_PATH} - ${SIMPLE_TRACE} + # # Codegen post-process + COMMAND + ${XPU_INSTALL_HEADER_COMMAND} WORKING_DIRECTORY ${TORCH_ROOT} DEPENDS - ${depended_files} - ${TORCH_XPU_OPS_ROOT}/yaml/native/${file_yaml} - ${XPUFallback_PATH} + ${CODEGEN_XPU_YAML_DIR}/native/${file_yaml} + ${XPUFallback_TEMPLATE} + ${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py ) # Post codegen delete the copied templates folder only on Windows. @@ -118,30 +121,25 @@ function(GEN_XPU file_yaml) endif() endfunction(GEN_XPU) -# GEN_BACKEND( -# xpu_functions.yaml -# XPUNativeFunctions.h -# RegisterXPU_0.cpp) - GEN_XPU( native_functions.yaml ${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions.h - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp - ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h - ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp + ${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions_inl.h + ${RegisterXPU_GENERATED} + ${RegisterSparseXPU_GENERATED} + ${RegisterSparseCsrXPU_GENERATED} + ${RegisterNestedTensorXPU_GENERATED} + ${XPU_AOTI_SHIM_HEADER} + ${XPU_AOTI_SHIM_SOURCE} ) +include(${BUILD_TORCH_XPU_ATEN_GENERATED}/xpu_ops_generated_headers.cmake) -# The c_shim_xpu.cpp needs include files in ${CMAKE_BINARY_DIR}/xpu/ATen/ops/*.h) -# The include path is auto generated as "#include -# To follow the design of aoti codegen, here ${CMAKE_BINARY_DIR}/xpu is added to -# $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include " works. -list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu) - -list(APPEND xpu_generated_src ${RegisterXPU_PATH} ${RegisterSparseXPU_PATH} ${RegisterSparseCsrXPU_PATH} ${RegisterNestedTensorXPU_PATH}) -list(APPEND xpu_generated_src ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp) -add_custom_target(TORCH_XPU_GEN_TARGET DEPENDS ${xpu_generated_src}) +list(APPEND xpu_generated_src + ${RegisterXPU_GENERATED} + ${RegisterSparseXPU_GENERATED} + ${RegisterSparseCsrXPU_GENERATED} + ${RegisterNestedTensorXPU_GENERATED} + ${XPU_AOTI_SHIM_SOURCE} +) set(ATen_XPU_GEN_SRCS ${xpu_generated_src}) diff --git a/src/ATen/CMakeLists.txt b/src/ATen/CMakeLists.txt index 22e060111..ad936acb8 100644 --- a/src/ATen/CMakeLists.txt +++ b/src/ATen/CMakeLists.txt @@ -19,3 +19,7 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE) foreach(HEADER ${xpu_h}) install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/xpu") endforeach() + +foreach(HEADER ${xpu_ops_generated_headers}) + install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops) +endforeach() diff --git a/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp b/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp index 38564914b..965de7b48 100644 --- a/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp +++ b/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/Activation.cpp b/src/ATen/native/xpu/Activation.cpp index 87cac9c36..a19249c19 100644 --- a/src/ATen/native/xpu/Activation.cpp +++ b/src/ATen/native/xpu/Activation.cpp @@ -7,9 +7,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp b/src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp index 4a34e70d1..4b3efcebe 100644 --- a/src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp +++ b/src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp @@ -7,8 +7,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp b/src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp index 1a445b8ec..86ffe0f57 100644 --- a/src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp +++ b/src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp b/src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp index 6098072ac..c587cde35 100644 --- a/src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp +++ b/src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp b/src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp index 7610dbd45..3bca6156b 100644 --- a/src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp +++ b/src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/AveragePool2d.cpp b/src/ATen/native/xpu/AveragePool2d.cpp index 326ad8a51..7647aa562 100644 --- a/src/ATen/native/xpu/AveragePool2d.cpp +++ b/src/ATen/native/xpu/AveragePool2d.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/AveragePool3d.cpp b/src/ATen/native/xpu/AveragePool3d.cpp index 471e98a27..97eda5ae1 100644 --- a/src/ATen/native/xpu/AveragePool3d.cpp +++ b/src/ATen/native/xpu/AveragePool3d.cpp @@ -1,8 +1,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/BinaryOps.cpp b/src/ATen/native/xpu/BinaryOps.cpp index 53a8e56d2..18654eda1 100644 --- a/src/ATen/native/xpu/BinaryOps.cpp +++ b/src/ATen/native/xpu/BinaryOps.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/ATen/native/xpu/Col2Im.cpp b/src/ATen/native/xpu/Col2Im.cpp index 2a6742e5e..71c42fd6b 100644 --- a/src/ATen/native/xpu/Col2Im.cpp +++ b/src/ATen/native/xpu/Col2Im.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/DilatedMaxPool2d.cpp b/src/ATen/native/xpu/DilatedMaxPool2d.cpp index a08227b47..c13e76bb5 100644 --- a/src/ATen/native/xpu/DilatedMaxPool2d.cpp +++ b/src/ATen/native/xpu/DilatedMaxPool2d.cpp @@ -4,9 +4,9 @@ #include #include -#include -#include -#include +#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/DilatedMaxPool3d.cpp b/src/ATen/native/xpu/DilatedMaxPool3d.cpp index f19e8c530..56d9ba0dc 100644 --- a/src/ATen/native/xpu/DilatedMaxPool3d.cpp +++ b/src/ATen/native/xpu/DilatedMaxPool3d.cpp @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/Dropout.cpp b/src/ATen/native/xpu/Dropout.cpp index bfb704e5f..5cc9ded92 100644 --- a/src/ATen/native/xpu/Dropout.cpp +++ b/src/ATen/native/xpu/Dropout.cpp @@ -3,8 +3,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/src/ATen/native/xpu/Embedding.cpp b/src/ATen/native/xpu/Embedding.cpp index 204a324fd..42b8cd67d 100644 --- a/src/ATen/native/xpu/Embedding.cpp +++ b/src/ATen/native/xpu/Embedding.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/ATen/native/xpu/EmbeddingBag.cpp b/src/ATen/native/xpu/EmbeddingBag.cpp index 25e9e8d1e..120370d6b 100644 --- a/src/ATen/native/xpu/EmbeddingBag.cpp +++ b/src/ATen/native/xpu/EmbeddingBag.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/ATen/native/xpu/Equal.cpp b/src/ATen/native/xpu/Equal.cpp index dcee9b380..bc9126843 100644 --- a/src/ATen/native/xpu/Equal.cpp +++ b/src/ATen/native/xpu/Equal.cpp @@ -1,6 +1,6 @@ #include -#include +#include namespace at { namespace xpu { diff --git a/src/ATen/native/xpu/ForeachOpScalarList.cpp b/src/ATen/native/xpu/ForeachOpScalarList.cpp index 87c1f0ce3..2ec48cf0f 100644 --- a/src/ATen/native/xpu/ForeachOpScalarList.cpp +++ b/src/ATen/native/xpu/ForeachOpScalarList.cpp @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/ForeachReduceOp.cpp b/src/ATen/native/xpu/ForeachReduceOp.cpp index a9ef1ff44..6b104dda2 100644 --- a/src/ATen/native/xpu/ForeachReduceOp.cpp +++ b/src/ATen/native/xpu/ForeachReduceOp.cpp @@ -1,8 +1,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/FractionalMaxPool2d.cpp b/src/ATen/native/xpu/FractionalMaxPool2d.cpp index e0ddea64b..2586fe17c 100644 --- a/src/ATen/native/xpu/FractionalMaxPool2d.cpp +++ b/src/ATen/native/xpu/FractionalMaxPool2d.cpp @@ -3,8 +3,8 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/FractionalMaxPool3d.cpp b/src/ATen/native/xpu/FractionalMaxPool3d.cpp index 29d6acf8c..a0ac54b1b 100644 --- a/src/ATen/native/xpu/FractionalMaxPool3d.cpp +++ b/src/ATen/native/xpu/FractionalMaxPool3d.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/Im2Col.cpp b/src/ATen/native/xpu/Im2Col.cpp index eb9f4077a..200b56831 100644 --- a/src/ATen/native/xpu/Im2Col.cpp +++ b/src/ATen/native/xpu/Im2Col.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/ATen/native/xpu/Indexing.cpp b/src/ATen/native/xpu/Indexing.cpp index bb8c07a92..fe4dc79fc 100644 --- a/src/ATen/native/xpu/Indexing.cpp +++ b/src/ATen/native/xpu/Indexing.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/LossMultiMargin.cpp b/src/ATen/native/xpu/LossMultiMargin.cpp index 2db427135..6f8076a0f 100644 --- a/src/ATen/native/xpu/LossMultiMargin.cpp +++ b/src/ATen/native/xpu/LossMultiMargin.cpp @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/LossNLL.cpp b/src/ATen/native/xpu/LossNLL.cpp index d80fef746..28cceca99 100644 --- a/src/ATen/native/xpu/LossNLL.cpp +++ b/src/ATen/native/xpu/LossNLL.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/PinnedMemoryAllocator.cpp b/src/ATen/native/xpu/PinnedMemoryAllocator.cpp index a12b686b2..88c9e46c2 100644 --- a/src/ATen/native/xpu/PinnedMemoryAllocator.cpp +++ b/src/ATen/native/xpu/PinnedMemoryAllocator.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/RangeFactories.cpp b/src/ATen/native/xpu/RangeFactories.cpp index bfa0f1545..4cbed8c73 100644 --- a/src/ATen/native/xpu/RangeFactories.cpp +++ b/src/ATen/native/xpu/RangeFactories.cpp @@ -10,10 +10,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at { diff --git a/src/ATen/native/xpu/ReflectionPad.cpp b/src/ATen/native/xpu/ReflectionPad.cpp index a88151914..0c9ee7da4 100644 --- a/src/ATen/native/xpu/ReflectionPad.cpp +++ b/src/ATen/native/xpu/ReflectionPad.cpp @@ -6,12 +6,12 @@ #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "ATen/TensorMeta.h" namespace at { diff --git a/src/ATen/native/xpu/ReplicationPadding.cpp b/src/ATen/native/xpu/ReplicationPadding.cpp index 3f0093845..e72ff0a4c 100644 --- a/src/ATen/native/xpu/ReplicationPadding.cpp +++ b/src/ATen/native/xpu/ReplicationPadding.cpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/Resize.cpp b/src/ATen/native/xpu/Resize.cpp index 66c95302b..19b37ff0c 100644 --- a/src/ATen/native/xpu/Resize.cpp +++ b/src/ATen/native/xpu/Resize.cpp @@ -6,20 +6,13 @@ #include #include -#include -#include -#include +#include +#include +#include #include namespace at { - -namespace native { -const at::Tensor& resize_( - const at::Tensor& self, - at::IntArrayRef size, - ::std::optional memory_format = ::std::nullopt); -} namespace native::xpu { const Tensor& resize_xpu_( diff --git a/src/ATen/native/xpu/SoftMax.cpp b/src/ATen/native/xpu/SoftMax.cpp index f155165ce..052a3d9cf 100644 --- a/src/ATen/native/xpu/SoftMax.cpp +++ b/src/ATen/native/xpu/SoftMax.cpp @@ -5,10 +5,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at::native { TORCH_IMPL_FUNC(softmax_xpu_out) diff --git a/src/ATen/native/xpu/SummaryOps.cpp b/src/ATen/native/xpu/SummaryOps.cpp index 953004227..22a52e4e2 100644 --- a/src/ATen/native/xpu/SummaryOps.cpp +++ b/src/ATen/native/xpu/SummaryOps.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp index bd24aa3a0..a1ce1fad5 100644 --- a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp +++ b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp @@ -22,9 +22,9 @@ #include #include -#include -#include //generated -//#include //generated +#include +#include //generated +//#include //generated namespace at { diff --git a/src/ATen/native/xpu/TensorFactories.cpp b/src/ATen/native/xpu/TensorFactories.cpp index 3caef39ba..2c0faa535 100644 --- a/src/ATen/native/xpu/TensorFactories.cpp +++ b/src/ATen/native/xpu/TensorFactories.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/src/ATen/native/xpu/TensorShape.cpp b/src/ATen/native/xpu/TensorShape.cpp index b237b4336..aae14c1b6 100644 --- a/src/ATen/native/xpu/TensorShape.cpp +++ b/src/ATen/native/xpu/TensorShape.cpp @@ -9,9 +9,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace at { diff --git a/src/ATen/native/xpu/TensorTopK.cpp b/src/ATen/native/xpu/TensorTopK.cpp index ab3fc5250..0c79610cd 100644 --- a/src/ATen/native/xpu/TensorTopK.cpp +++ b/src/ATen/native/xpu/TensorTopK.cpp @@ -5,7 +5,7 @@ #include -#include +#include namespace at { diff --git a/src/ATen/native/xpu/TriangluarOps.cpp b/src/ATen/native/xpu/TriangluarOps.cpp index 3db5e967b..39213b00f 100644 --- a/src/ATen/native/xpu/TriangluarOps.cpp +++ b/src/ATen/native/xpu/TriangluarOps.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/UpSampleBicubic2d.cpp b/src/ATen/native/xpu/UpSampleBicubic2d.cpp index 7e0e4de40..388c6d0e4 100644 --- a/src/ATen/native/xpu/UpSampleBicubic2d.cpp +++ b/src/ATen/native/xpu/UpSampleBicubic2d.cpp @@ -5,10 +5,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at { namespace native { TORCH_IMPL_FUNC(upsample_bicubic2d_out_xpu) diff --git a/src/ATen/native/xpu/UpSampleBilinear2d.cpp b/src/ATen/native/xpu/UpSampleBilinear2d.cpp index aec707193..91bc5219b 100644 --- a/src/ATen/native/xpu/UpSampleBilinear2d.cpp +++ b/src/ATen/native/xpu/UpSampleBilinear2d.cpp @@ -4,10 +4,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/UpSampleLinear1d.cpp b/src/ATen/native/xpu/UpSampleLinear1d.cpp index 13dfa33de..388f6c257 100644 --- a/src/ATen/native/xpu/UpSampleLinear1d.cpp +++ b/src/ATen/native/xpu/UpSampleLinear1d.cpp @@ -5,8 +5,8 @@ #include #include "ATen/core/ATen_fwd.h" -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/UpSampleNearest1d.cpp b/src/ATen/native/xpu/UpSampleNearest1d.cpp index 30287e4b2..7603a43e9 100644 --- a/src/ATen/native/xpu/UpSampleNearest1d.cpp +++ b/src/ATen/native/xpu/UpSampleNearest1d.cpp @@ -2,10 +2,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/UpSampleNearest2d.cpp b/src/ATen/native/xpu/UpSampleNearest2d.cpp index 9ebbd74b1..c906a703f 100644 --- a/src/ATen/native/xpu/UpSampleNearest2d.cpp +++ b/src/ATen/native/xpu/UpSampleNearest2d.cpp @@ -2,10 +2,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/UpSampleNearest3d.cpp b/src/ATen/native/xpu/UpSampleNearest3d.cpp index 5528b0ac2..8cc0bb9f8 100644 --- a/src/ATen/native/xpu/UpSampleNearest3d.cpp +++ b/src/ATen/native/xpu/UpSampleNearest3d.cpp @@ -1,14 +1,14 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace at::native { diff --git a/src/ATen/native/xpu/UpSampleTrilinear3d.cpp b/src/ATen/native/xpu/UpSampleTrilinear3d.cpp index 4c46a07c6..e6a28ca84 100644 --- a/src/ATen/native/xpu/UpSampleTrilinear3d.cpp +++ b/src/ATen/native/xpu/UpSampleTrilinear3d.cpp @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include namespace at { namespace native { diff --git a/src/ATen/native/xpu/XPUScalar.cpp b/src/ATen/native/xpu/XPUScalar.cpp index d47dd7871..25acb44d1 100644 --- a/src/ATen/native/xpu/XPUScalar.cpp +++ b/src/ATen/native/xpu/XPUScalar.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace at::native { diff --git a/test/regressions/test_xpu_ops_header.py b/test/regressions/test_xpu_ops_header.py new file mode 100644 index 000000000..844c9b25a --- /dev/null +++ b/test/regressions/test_xpu_ops_header.py @@ -0,0 +1,21 @@ +# Owner(s): ["module: intel"] +import os + +import torch +from torch.testing._internal.common_utils import TestCase + + +class TestXpuOpsHeader(TestCase): + def test_xpu_ops_header(self): + include_dir = os.path.join(os.path.dirname(torch.__file__), "include") + aten_ops_dir = os.path.join(include_dir, "ATen/ops") + self.assertTrue( + os.path.exists(os.path.join(aten_ops_dir, "cat_xpu_dispatch.h")) + ) + self.assertTrue( + os.path.exists(os.path.join(aten_ops_dir, "index_fill_xpu_dispatch.h")) + ) + self.assertTrue(os.path.exists(os.path.join(aten_ops_dir, "col2im_native.h"))) + with open(os.path.join(aten_ops_dir, "col2im_native.h")) as fr: + text = fr.read() + self.assertTrue("col2im_xpu" in text) diff --git a/tools/codegen/install_xpu_headers.py b/tools/codegen/install_xpu_headers.py new file mode 100644 index 000000000..c6fdefb4a --- /dev/null +++ b/tools/codegen/install_xpu_headers.py @@ -0,0 +1,130 @@ +import argparse +import os +import re +import shutil + + +parser = argparse.ArgumentParser(description="Utils for append ops headers") +parser.add_argument( + "--src-header-dir", type=str, help="torch-xpu-ops build header file path" +) +parser.add_argument("--dst-header-dir", type=str, help="torch build header file path") +parser.add_argument( + "--dry-run", action="store_true", help="run without writing any files" +) +args = parser.parse_args() + + +def append_xpu_function_header(src, dst): + r""" + Append XPU function header XPUFunctions_inl.h from source to destination build. + """ + if args.dry_run: + return + + with open(dst) as fr: + lines = fr.readlines() + while lines and lines[-1].strip() == "": + lines.pop() + with open(dst, "w") as fw: + fw.writelines(lines) + + with open(src) as fr, open(dst, "a") as fa: + src_lines = fr.readlines() + for line in src_lines: + if re.match(r"^#include " not in ln: - fw.write(ln) - -def replace_op_headers(): - with open(args.register_xpu_path) as fr: - lines = fr.readlines() - patt = r'#include