diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1325,6 +1325,17 @@ // replaced by a multiplication. if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) return BinaryOperator::CreateFMulFMF(Y, Op0, &I); + + // Negate the exponent of pow to fold division-by-pow() into multiply: + // Z / pow(X, Y) --> Z * pow(X, -Y) + // In the general case, this creates an extra instruction, but fmul allows + // for better canonicalization and optimization than fdiv. + if (match(Op1, + m_OneUse(m_Intrinsic(m_Value(X), m_Value(Y))))) { + Value *NegY = Builder.CreateFNegFMF(Y, &I); + Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, NegY, &I); + return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); + } } if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) { diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -661,8 +661,9 @@ define float @pow_divisor(float %x, float %y, float %z) { ; CHECK-LABEL: @pow_divisor( -; CHECK-NEXT: [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc arcp float @llvm.pow.f32(float [[X:%.*]], float [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]] ; CHECK-NEXT: ret float [[R]] ; %p = call float @llvm.pow.f32(float %x, float %y) @@ -670,6 +671,8 @@ ret float %r } +; Negative test - don't create an extra pow + define float @pow_divisor_extra_use(float %x, float %y, float %z) { ; CHECK-LABEL: @pow_divisor_extra_use( ; CHECK-NEXT: [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -683,6 +686,8 @@ ret float %r } +; Negative test - must have reassoc+arcp + define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) { ; CHECK-LABEL: @pow_divisor_not_enough_fmf( ; CHECK-NEXT: [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -694,6 +699,8 @@ ret float %r } +; Negative test - must have reassoc+arcp + define float @pow_divisor_not_enough_fmf2(float %x, float %y, float %z) { ; CHECK-LABEL: @pow_divisor_not_enough_fmf2( ; CHECK-NEXT: [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -705,11 +712,13 @@ ret float %r } +; Special-case - reciprocal does not require extra fmul + define <2 x half> @pow_recip(<2 x half> %x, <2 x half> %y) { ; CHECK-LABEL: @pow_recip( -; CHECK-NEXT: [[P:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> , [[P]] -; CHECK-NEXT: ret <2 x half> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[TMP1]]) +; CHECK-NEXT: ret <2 x half> [[TMP2]] ; %p = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) %r = fdiv reassoc arcp ninf <2 x half> , %p