Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -19,12 +19,14 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include #define DEBUG_TYPE "amdgpu-simplifylib" using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), @@ -782,32 +784,19 @@ "fold_pow: encounter a wrong function call"); Module *M = B.GetInsertBlock()->getModule(); - ConstantFP *CF; - ConstantInt *CINT; - Type *eltType; + Type *eltType = FPOp->getType()->getScalarType(); Value *opr0 = FPOp->getOperand(0); Value *opr1 = FPOp->getOperand(1); - ConstantAggregateZero *CZero = dyn_cast(opr1); - if (getVecSize(FInfo) == 1) { - eltType = opr0->getType(); - CF = dyn_cast(opr1); - CINT = dyn_cast(opr1); - } else { - VectorType *VTy = dyn_cast(opr0->getType()); - assert(VTy && "Oprand of vector function should be of vectortype"); - eltType = VTy->getElementType(); - ConstantDataVector *CDV = dyn_cast(opr1); - - // Now, only Handle vector const whose elements have the same value. - CF = CDV ? dyn_cast_or_null(CDV->getSplatValue()) : nullptr; - CINT = CDV ? dyn_cast_or_null(CDV->getSplatValue()) : nullptr; - } + const APFloat *CF = nullptr; + const APInt *CINT = nullptr; + if (!match(opr1, m_APFloatAllowUndef(CF))) + match(opr1, m_APIntAllowUndef(CINT)); // 0x1111111 means that we don't do anything for this call. int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); - if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { + if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) { // pow/powr/pown(x, 0) == 1 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n"); Constant *cnval = ConstantFP::get(eltType, 1.0); @@ -867,8 +856,8 @@ // Remember that ci_opr1 is set if opr1 is integral if (CF) { double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) - ? (double)CF->getValueAPF().convertToFloat() - : CF->getValueAPF().convertToDouble(); + ? (double)CF->convertToFloat() + : CF->convertToDouble(); int ival = (int)dval; if ((double)ival == dval) { ci_opr1 = ival; @@ -926,12 +915,13 @@ bool needcopysign = false; Constant *cnval = nullptr; if (getVecSize(FInfo) == 1) { - CF = dyn_cast(opr0); + CF = nullptr; + match(opr0, m_APFloatAllowUndef(CF)); if (CF) { double V = (getArgType(FInfo) == AMDGPULibFunc::F32) - ? (double)CF->getValueAPF().convertToFloat() - : CF->getValueAPF().convertToDouble(); + ? (double)CF->convertToFloat() + : CF->convertToDouble(); V = log2(std::abs(V)); cnval = ConstantFP::get(eltType, V); Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -562,8 +562,7 @@ define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: ret <3 x float> ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -572,8 +571,7 @@ define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: ret <3 x float> ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -632,8 +630,8 @@ define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2SQRT]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -642,8 +640,8 @@ define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2RSQRT]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -700,8 +698,7 @@ define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: ret <3 x float> [[X]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -710,8 +707,8 @@ define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POW]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll @@ -535,8 +535,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: ret <3 x float> ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -545,8 +544,7 @@ define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: ret <3 x float> ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -605,8 +603,8 @@ define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2SQRT]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -615,8 +613,8 @@ define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2RSQRT]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -673,8 +671,7 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: ret <3 x float> [[X]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -683,8 +680,8 @@ define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> ) -; CHECK-NEXT: ret <3 x float> [[POWR]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr