From d22ca717450b8bdf17c900fffc0a88c18359c9ca Mon Sep 17 00:00:00 2001 From: minhthuc Date: Mon, 8 Apr 2024 14:32:00 +0200 Subject: [PATCH] optimize compilation --- include/ctranslate2/ops/flash-attention/softmax.h | 4 ++-- python/tools/prepare_build_environment_windows.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ctranslate2/ops/flash-attention/softmax.h b/include/ctranslate2/ops/flash-attention/softmax.h index 3558459b8..0af500c56 100644 --- a/include/ctranslate2/ops/flash-attention/softmax.h +++ b/include/ctranslate2/ops/flash-attention/softmax.h @@ -162,7 +162,7 @@ struct Softmax { } }; - template + template __forceinline__ __device__ TensorT normalize_softmax_lse(Tensor0 &acc_o, float softmax_scale, float rp_dropout=1.0) { SumOp sum_op; quad_allreduce_(row_sum, row_sum, sum_op); @@ -174,7 +174,7 @@ struct Softmax { float sum = row_sum(mi); float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum; lse(mi) = (sum == 0.f || sum != sum) ? (Split ? -INFINITY : INFINITY) : row_max(mi) * softmax_scale + __logf(sum); - float scale = !Is_dropout ? inv_sum : inv_sum * rp_dropout; + float scale = inv_sum; #pragma unroll for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) { acc_o_rowcol(mi, ni) *= scale; } } diff --git a/python/tools/prepare_build_environment_windows.sh b/python/tools/prepare_build_environment_windows.sh index b5fe03ecc..1a4d76eeb 100755 --- a/python/tools/prepare_build_environment_windows.sh +++ b/python/tools/prepare_build_environment_windows.sh @@ -26,14 +26,14 @@ curl -L -O https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERS tar xf *.tar.gz && rm *.tar.gz cd oneDNN-* cmake -DCMAKE_BUILD_TYPE=Release -DONEDNN_LIBRARY_TYPE=STATIC -DONEDNN_BUILD_EXAMPLES=OFF -DONEDNN_BUILD_TESTS=OFF -DONEDNN_ENABLE_WORKLOAD=INFERENCE -DONEDNN_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" -DONEDNN_BUILD_GRAPH=OFF . -cmake --build . --config Release --target install --parallel 2 +cmake --build . --config Release --target install --parallel 6 cd .. rm -r oneDNN-* mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$CTRANSLATE2_ROOT -DCMAKE_PREFIX_PATH="C:/Program Files (x86)/Intel/oneAPI/compiler/latest/windows/compiler/lib/intel64_win;C:/Program Files (x86)/oneDNN" -DBUILD_CLI=OFF -DWITH_DNNL=ON -DWITH_CUDA=ON -DWITH_CUDNN=ON -DCUDA_TOOLKIT_ROOT_DIR="$CUDA_ROOT" -DCUDA_DYNAMIC_LOADING=ON -DCUDA_NVCC_FLAGS="-Xfatbin=-compress-all" -DCUDA_ARCH_LIST="Common" .. -cmake --build . --config Release --target install --parallel 2 --verbose +cmake --build . --config Release --target install --parallel 6 --verbose cd .. rm -r build