Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -14,7 +14,10 @@ #include "AMDGPU.h" #include "AMDGPULibFunc.h" #include "GCNSubtarget.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -46,6 +49,9 @@ class AMDGPULibCalls { private: + const TargetLibraryInfo *TLInfo = nullptr; + AssumptionCache *AC = nullptr; + DominatorTree *DT = nullptr; typedef llvm::AMDGPULibFunc FuncInfo; @@ -135,7 +141,7 @@ bool fold(CallInst *CI); - void initFunction(const Function &F); + void initFunction(Function &F, FunctionAnalysisManager &FAM); void initNativeFuncs(); // Replace a normal math function call with that native version @@ -420,8 +426,11 @@ return isUnsafeMath(FPOp); } -void AMDGPULibCalls::initFunction(const Function &F) { +void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) { UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool(); + AC = &FAM.getResult(F); + TLInfo = &FAM.getResult(F); + DT = FAM.getCachedResult(F); } bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { @@ -676,12 +685,14 @@ Module *M = Callee->getParent(); AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo); FunctionCallee PowrFunc = getFunction(M, PowrInfo); + CallInst *Call = cast(FPOp); // pow(x, y) -> powr(x, y) for x >= -0.0 - // TODO: Pass all arguments to cannotBeOrderedLessThanZero - if (PowrFunc && cannotBeOrderedLessThanZero(FPOp->getOperand(0), - M->getDataLayout())) { - cast(FPOp)->setCalledFunction(PowrFunc); + // TODO: Account for flags on current call + if (PowrFunc && + cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(), + TLInfo, 0, AC, Call, DT)) { + Call->setCalledFunction(PowrFunc); return fold_pow(FPOp, B, PowrInfo) || true; } @@ -1595,7 +1606,7 @@ FunctionAnalysisManager &AM) { AMDGPULibCalls Simplifier; Simplifier.initNativeFuncs(); - Simplifier.initFunction(F); + Simplifier.initFunction(F, AM); bool Changed = false; @@ -1624,7 +1635,7 @@ AMDGPULibCalls Simplifier; Simplifier.initNativeFuncs(); - Simplifier.initFunction(F); + Simplifier.initFunction(F, AM); bool Changed = false; for (auto &BB : F) { Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -1610,7 +1610,7 @@ ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[X_OGE_ZERO:%.*]] = fcmp oge float [[X]], 0.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_OGE_ZERO]]) -; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) ; CHECK-NEXT: ret float [[POW]] ; %x.oge.zero = fcmp oge float %x, 0.0 @@ -1624,7 +1624,7 @@ ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[X_OGE_ZERO:%.*]] = fcmp ogt float [[X]], 0.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_OGE_ZERO]]) -; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) ; CHECK-NEXT: ret float [[POW]] ; %x.oge.zero = fcmp ogt float %x, 0.0 @@ -1638,7 +1638,7 @@ ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[X_UGE_ZERO:%.*]] = fcmp uge float [[X]], 0.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_UGE_ZERO]]) -; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) ; CHECK-NEXT: ret float [[POW]] ; %x.uge.zero = fcmp uge float %x, 0.0 @@ -1652,7 +1652,7 @@ ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { ; CHECK-NEXT: [[X_UGT_ZERO:%.*]] = fcmp ugt float [[X]], 0.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_UGT_ZERO]]) -; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) ; CHECK-NEXT: ret float [[POW]] ; %x.ugt.zero = fcmp ugt float %x, 0.0