Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -47,7 +47,7 @@ FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); -FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); +FunctionPass *createAMDGPUSimplifyLibCallsPass(); FunctionPass *createAMDGPUUseNativeCallsPass(); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); @@ -59,11 +59,8 @@ FunctionPass *createGCNPreRAOptimizationsPass(); struct AMDGPUSimplifyLibCallsPass : PassInfoMixin { - AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} + AMDGPUSimplifyLibCallsPass() {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - TargetMachine &TM; }; struct AMDGPUUseNativeCallsPass : PassInfoMixin { Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -20,7 +20,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/InitializePasses.h" -#include "llvm/Target/TargetMachine.h" #include #define DEBUG_TYPE "amdgpu-simplifylib" @@ -49,8 +48,6 @@ typedef llvm::AMDGPULibFunc FuncInfo; - const TargetMachine *TM; - bool UnsafeFPMath = false; // -fuse-native. @@ -98,9 +95,6 @@ bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - // llvm.amdgcn.wavefrontsize - bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); - // Get a scalar native builtin single argument FP function FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); @@ -119,7 +113,7 @@ } public: - AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} + AMDGPULibCalls() {} bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); @@ -141,8 +135,7 @@ public: static char ID; // Pass identification - AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr) - : FunctionPass(ID), Simplifier(TM) { + AMDGPUSimplifyLibCalls() : FunctionPass(ID) { initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } @@ -596,18 +589,8 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { Function *Callee = CI->getCalledFunction(); // Ignore indirect calls. - if (!Callee || CI->isNoBuiltin()) - return false; - - IRBuilder<> B(CI); - switch (Callee->getIntrinsicID()) { - case Intrinsic::not_intrinsic: - break; - case Intrinsic::amdgcn_wavefrontsize: - return !EnablePreLink && fold_wavefrontsize(CI, B); - default: + if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin()) return false; - } FuncInfo FInfo; if (!parseFunctionName(Callee->getName(), FInfo)) @@ -623,6 +606,8 @@ if (TDOFold(CI, FInfo)) return true; + IRBuilder<> B(CI); + if (FPMathOperator *FPOp = dyn_cast(CI)) { // Under unsafe-math, evaluate calls if possible. // According to Brian Sumner, we can do this for all f32 function calls @@ -1266,28 +1251,6 @@ return true; } -bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { - if (!TM) - return false; - - StringRef CPU = TM->getTargetCPU(); - StringRef Features = TM->getTargetFeatureString(); - if ((CPU.empty() || CPU.equals_insensitive("generic")) && - (Features.empty() || !Features.contains_insensitive("wavefrontsize"))) - return false; - - Function *F = CI->getParent()->getParent(); - const GCNSubtarget &ST = TM->getSubtarget(*F); - unsigned N = ST.getWavefrontSize(); - - LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " - << N << "\n"); - - CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); - CI->eraseFromParent(); - return true; -} - bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0, double& Res1, Constant *copr0, Constant *copr1, @@ -1575,8 +1538,8 @@ } // Public interface to the Simplify LibCalls pass. -FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) { - return new AMDGPUSimplifyLibCalls(TM); +FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() { + return new AMDGPUSimplifyLibCalls(); } FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { @@ -1611,7 +1574,7 @@ PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { - AMDGPULibCalls Simplifier(&TM); + AMDGPULibCalls Simplifier; Simplifier.initNativeFuncs(); Simplifier.initFunction(F); Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -630,7 +630,7 @@ [this](StringRef PassName, FunctionPassManager &PM, ArrayRef) { if (PassName == "amdgpu-simplifylib") { - PM.addPass(AMDGPUSimplifyLibCallsPass(*this)); + PM.addPass(AMDGPUSimplifyLibCallsPass()); return true; } if (PassName == "amdgpu-usenative") { @@ -682,11 +682,11 @@ }); PB.registerPipelineStartEPCallback( - [this](ModulePassManager &PM, OptimizationLevel Level) { + [](ModulePassManager &PM, OptimizationLevel Level) { FunctionPassManager FPM; FPM.addPass(AMDGPUUseNativeCallsPass()); if (EnableLibCallSimplify && Level != OptimizationLevel::O0) - FPM.addPass(AMDGPUSimplifyLibCallsPass(*this)); + FPM.addPass(AMDGPUSimplifyLibCallsPass()); PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); }); Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -4,16 +4,16 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s -; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s -; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -passes='default' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s -; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s +; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -passes='default' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s ; GCN-LABEL: {{^}}fold_wavefrontsize: ; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( @@ -22,10 +22,8 @@ ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT-W32: store i32 32, ptr addrspace(1) %arg, align 4 -; OPT-W64: store i32 64, ptr addrspace(1) %arg, align 4 -; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT-WXX: store i32 %tmp, ptr addrspace(1) %arg, align 4 +; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() +; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4 ; OPT-NEXT: ret void define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { @@ -43,12 +41,10 @@ ; GCN-NOT: cndmask ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT-W32: store i32 1, ptr addrspace(1) %arg, align 4 -; OPT-W64: store i32 2, ptr addrspace(1) %arg, align 4 -; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1 -; OPT-WXX: store i32 %tmp2, ptr addrspace(1) %arg +; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() +; OPT: %tmp1 = icmp ugt i32 %tmp, 32 +; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1 +; OPT: store i32 %tmp2, ptr addrspace(1) %arg ; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { @@ -64,10 +60,9 @@ ; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( ; OPT: bb: -; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT-WXX: bb3: -; OPT-W64: store i32 1, ptr addrspace(1) %arg, align 4 +; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() +; OPT: %tmp1 = icmp ugt i32 %tmp, 32 +; OPT: bb3: ; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) {