From 2b4876157562bc76e86f193d371348993905bc61 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 14 Dec 2021 17:55:25 -0500 Subject: [PATCH] AMDGPU: Remove AMDGPUFixFunctionBitcasts pass This was a workaround for not supporting indirect calls when instcombine didn't eliminate constant expression casts of the callee at -O0. Indirect calls are supposed to work now, so drop the hack. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 -- .../AMDGPU/AMDGPUFixFunctionBitcasts.cpp | 64 ------------------- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 5 -- llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 - llvm/test/CodeGen/AMDGPU/call-constexpr.ll | 29 +-------- llvm/test/CodeGen/AMDGPU/unsupported-calls.ll | 2 +- 6 files changed, 2 insertions(+), 103 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 958e8c9e5bc54e..910a1aafc2f88c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -90,10 +90,6 @@ ModulePass *createAMDGPULowerIntrinsicsPass(); void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); extern char &AMDGPULowerIntrinsicsID; -ModulePass *createAMDGPUFixFunctionBitcastsPass(); -void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); -extern char &AMDGPUFixFunctionBitcastsID; - ModulePass *createAMDGPUCtorDtorLoweringPass(); void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); extern char &AMDGPUCtorDtorLoweringID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp deleted file mode 100644 index ea6c6d0fd212b5..00000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp +++ /dev/null @@ -1,64 +0,0 @@ -//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Promote indirect (bitcast) calls to direct calls when they are statically -/// known to be direct. Required when InstCombine is not run (e.g. at OptNone) -/// because AMDGPU does not support indirect calls. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "amdgpu-fix-function-bitcasts" - -namespace { -class AMDGPUFixFunctionBitcasts final - : public ModulePass, - public InstVisitor { - - bool runOnModule(Module &M) override; - - bool Modified; - -public: - void visitCallBase(CallBase &CB) { - if (CB.getCalledFunction()) - return; - auto *Callee = - dyn_cast(CB.getCalledOperand()->stripPointerCasts()); - if (Callee && isLegalToPromote(CB, Callee)) { - promoteCall(CB, Callee); - Modified = true; - } - } - - static char ID; - AMDGPUFixFunctionBitcasts() : ModulePass(ID) {} -}; -} // End anonymous namespace - -char AMDGPUFixFunctionBitcasts::ID = 0; -char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID; -INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE, - "Fix function bitcasts for AMDGPU", false, false) - -ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() { - return new AMDGPUFixFunctionBitcasts(); -} - -bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) { - Modified = false; - visit(M); - return Modified; -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a2c61f9da8daea..c2acc20bfe256a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -330,7 +330,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); - initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUCtorDtorLoweringPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAttributorPass(*PR); @@ -953,10 +952,6 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPUPrintfRuntimeBinding()); addPass(createAMDGPUCtorDtorLoweringPass()); - // This must occur before inlining, as the inliner will not look through - // bitcast calls. - addPass(createAMDGPUFixFunctionBitcastsPass()); - // A call to propagate attributes pass in the backend in case opt was not run. addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index ca5208355db960..2be40d8901440f 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -54,7 +54,6 @@ add_llvm_target(AMDGPUCodeGen AMDGPUCombinerHelper.cpp AMDGPUCtorDtorLowering.cpp AMDGPUExportClustering.cpp - AMDGPUFixFunctionBitcasts.cpp AMDGPUFrameLowering.cpp AMDGPUGlobalISelUtils.cpp AMDGPUHSAMetadataStreamer.cpp diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll index cd4ba516ee2f6e..6de7928167a3c0 100644 --- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll @@ -1,14 +1,10 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-fix-function-bitcasts < %s | FileCheck -check-prefix=OPT %s ; GCN-LABEL: {{^}}test_bitcast_return_type_noinline: ; GCN: s_getpc_b64 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_return_type_noinline( -; OPT: %val = call i32 @ret_i32_noinline() -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { %val = call float bitcast (i32()* @ret_i32_noinline to float()*)() %op = fadd float %val, 1.0 @@ -17,13 +13,7 @@ define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { } ; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline: -; GCN-NOT: s_getpc_b64 -; GCN-NOT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@lo+4 -; GCN-NOT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@hi+12 -; GCN-NOT: s_swappc_b64 -; OPT-LABEL: @test_bitcast_return_type_alwaysinline( -; OPT: %val = call i32 @ret_i32_alwaysinline() -; OPT: bitcast i32 %val to float +; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { %val = call float bitcast (i32()* @ret_i32_alwaysinline to float()*)() %op = fadd float %val, 1.0 @@ -36,10 +26,6 @@ define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_argument_type( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = call i32 @ident_i32(i32 %1) -; OPT-NOT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_argument_type() #0 { %val = call i32 bitcast (i32(i32)* @ident_i32 to i32(float)*)(float 2.0) %op = add i32 %val, 1 @@ -52,10 +38,6 @@ define amdgpu_kernel void @test_bitcast_argument_type() #0 { ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_argument_and_return_types( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = call i32 @ident_i32(i32 %1) -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 { %val = call float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) %op = fadd float %val, 1.0 @@ -82,9 +64,6 @@ define hidden i32 @use_workitem_id_x(i32 %arg0) #0 { ; GCN: v_mov_b32_e32 v0, 9 ; GCN: s_swappc_b64 ; GCN: v_add_f32_e32 -; OPT-LABEL: @use_workitem_id_x( -; OPT: %val = call i32 @use_workitem_id_x(i32 9) -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { %val = call float bitcast (i32(i32)* @use_workitem_id_x to float(i32)*)(i32 9) %op = fadd float %val, 1.0 @@ -97,12 +76,6 @@ define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_invoke( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = invoke i32 @ident_i32(i32 %1) -; OPT-NEXT: to label %continue.split unwind label %broken -; OPT-LABEL: continue.split: -; OPT: bitcast i32 %val to float @_ZTIi = external global i8* declare i32 @__gxx_personality_v0(...) define amdgpu_kernel void @test_invoke() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll index a50dccc757be09..eeb54f927aaf5f 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -54,7 +54,7 @@ define void @test_call_varargs() { declare i32 @extern_variadic(...) -; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to variadic function extern_variadic +; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported required tail call to function extern_variadic ; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) { %add = fadd <4 x float> %arg0, %arg1