Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -90,10 +90,6 @@ void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); extern char &AMDGPULowerIntrinsicsID; -ModulePass *createAMDGPUFixFunctionBitcastsPass(); -void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); -extern char &AMDGPUFixFunctionBitcastsID; - ModulePass *createAMDGPUCtorDtorLoweringPass(); void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); extern char &AMDGPUCtorDtorLoweringID; Index: llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp +++ /dev/null @@ -1,64 +0,0 @@ -//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Promote indirect (bitcast) calls to direct calls when they are statically -/// known to be direct. Required when InstCombine is not run (e.g. at OptNone) -/// because AMDGPU does not support indirect calls. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "amdgpu-fix-function-bitcasts" - -namespace { -class AMDGPUFixFunctionBitcasts final - : public ModulePass, - public InstVisitor { - - bool runOnModule(Module &M) override; - - bool Modified; - -public: - void visitCallBase(CallBase &CB) { - if (CB.getCalledFunction()) - return; - auto *Callee = - dyn_cast(CB.getCalledOperand()->stripPointerCasts()); - if (Callee && isLegalToPromote(CB, Callee)) { - promoteCall(CB, Callee); - Modified = true; - } - } - - static char ID; - AMDGPUFixFunctionBitcasts() : ModulePass(ID) {} -}; -} // End anonymous namespace - -char AMDGPUFixFunctionBitcasts::ID = 0; -char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID; -INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE, - "Fix function bitcasts for AMDGPU", false, false) - -ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() { - return new AMDGPUFixFunctionBitcasts(); -} - -bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) { - Modified = false; - visit(M); - return Modified; -} Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -330,7 +330,6 @@ initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); - initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUCtorDtorLoweringPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAttributorPass(*PR); @@ -953,10 +952,6 @@ addPass(createAMDGPUPrintfRuntimeBinding()); addPass(createAMDGPUCtorDtorLoweringPass()); - // This must occur before inlining, as the inliner will not look through - // bitcast calls. - addPass(createAMDGPUFixFunctionBitcastsPass()); - // A call to propagate attributes pass in the backend in case opt was not run. addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); Index: llvm/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -54,7 +54,6 @@ AMDGPUCombinerHelper.cpp AMDGPUCtorDtorLowering.cpp AMDGPUExportClustering.cpp - AMDGPUFixFunctionBitcasts.cpp AMDGPUFrameLowering.cpp AMDGPUGlobalISelUtils.cpp AMDGPUHSAMetadataStreamer.cpp Index: llvm/test/CodeGen/AMDGPU/call-constexpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-constexpr.ll +++ llvm/test/CodeGen/AMDGPU/call-constexpr.ll @@ -1,14 +1,10 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-fix-function-bitcasts < %s | FileCheck -check-prefix=OPT %s ; GCN-LABEL: {{^}}test_bitcast_return_type_noinline: ; GCN: s_getpc_b64 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_return_type_noinline( -; OPT: %val = call i32 @ret_i32_noinline() -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { %val = call float bitcast (i32()* @ret_i32_noinline to float()*)() %op = fadd float %val, 1.0 @@ -17,13 +13,7 @@ } ; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline: -; GCN-NOT: s_getpc_b64 -; GCN-NOT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@lo+4 -; GCN-NOT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@hi+12 -; GCN-NOT: s_swappc_b64 -; OPT-LABEL: @test_bitcast_return_type_alwaysinline( -; OPT: %val = call i32 @ret_i32_alwaysinline() -; OPT: bitcast i32 %val to float +; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { %val = call float bitcast (i32()* @ret_i32_alwaysinline to float()*)() %op = fadd float %val, 1.0 @@ -36,10 +26,6 @@ ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_argument_type( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = call i32 @ident_i32(i32 %1) -; OPT-NOT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_argument_type() #0 { %val = call i32 bitcast (i32(i32)* @ident_i32 to i32(float)*)(float 2.0) %op = add i32 %val, 1 @@ -52,10 +38,6 @@ ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_bitcast_argument_and_return_types( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = call i32 @ident_i32(i32 %1) -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 { %val = call float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) %op = fadd float %val, 1.0 @@ -82,9 +64,6 @@ ; GCN: v_mov_b32_e32 v0, 9 ; GCN: s_swappc_b64 ; GCN: v_add_f32_e32 -; OPT-LABEL: @use_workitem_id_x( -; OPT: %val = call i32 @use_workitem_id_x(i32 9) -; OPT: bitcast i32 %val to float define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { %val = call float bitcast (i32(i32)* @use_workitem_id_x to float(i32)*)(i32 9) %op = fadd float %val, 1.0 @@ -97,12 +76,6 @@ ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 ; GCN: s_swappc_b64 -; OPT-LABEL: @test_invoke( -; OPT: %1 = bitcast float 2.000000e+00 to i32 -; OPT: %val = invoke i32 @ident_i32(i32 %1) -; OPT-NEXT: to label %continue.split unwind label %broken -; OPT-LABEL: continue.split: -; OPT: bitcast i32 %val to float @_ZTIi = external global i8* declare i32 @__gxx_personality_v0(...) define amdgpu_kernel void @test_invoke() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {