diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -475,7 +475,6 @@ SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); - SDValue visitFPOW(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); @@ -1689,7 +1688,6 @@ case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); - case ISD::FPOW: return visitFPOW(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); @@ -14520,83 +14518,6 @@ return SDValue(); } -SDValue DAGCombiner::visitFPOW(SDNode *N) { - ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); - if (!ExponentC) - return SDValue(); - SelectionDAG::FlagInserter FlagsInserter(DAG, N); - - // Try to convert x ** (1/3) into cube root. - // TODO: Handle the various flavors of long double. - // TODO: Since we're approximating, we don't need an exact 1/3 exponent. - // Some range near 1/3 should be fine. - EVT VT = N->getValueType(0); - if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || - (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { - // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. - // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. - // pow(-val, 1/3) = nan; cbrt(-val) = -num. - // For regular numbers, rounding may cause the results to differ. - // Therefore, we require { nsz ninf nnan afn } for this transform. - // TODO: We could select out the special cases if we don't have nsz/ninf. - SDNodeFlags Flags = N->getFlags(); - if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || - !Flags.hasApproximateFuncs()) - return SDValue(); - - // Do not create a cbrt() libcall if the target does not have it, and do not - // turn a pow that has lowering support into a cbrt() libcall. - if (!DAG.getLibInfo().has(LibFunc_cbrt) || - (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && - DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) - return SDValue(); - - return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0)); - } - - // Try to convert x ** (1/4) and x ** (3/4) into square roots. - // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. - // TODO: This could be extended (using a target hook) to handle smaller - // power-of-2 fractional exponents. - bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25); - bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75); - if (ExponentIs025 || ExponentIs075) { - // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. - // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. - // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0. - // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN. - // For regular numbers, rounding may cause the results to differ. - // Therefore, we require { nsz ninf afn } for this transform. - // TODO: We could select out the special cases if we don't have nsz/ninf. - SDNodeFlags Flags = N->getFlags(); - - // We only need no signed zeros for the 0.25 case. - if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || - !Flags.hasApproximateFuncs()) - return SDValue(); - - // Don't double the number of libcalls. We are trying to inline fast code. - if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) - return SDValue(); - - // Assume that libcalls are the smallest code. - // TODO: This restriction should probably be lifted for vectors. - if (ForCodeSize) - return SDValue(); - - // pow(X, 0.25) --> sqrt(sqrt(X)) - SDLoc DL(N); - SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0)); - SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt); - if (ExponentIs025) - return SqrtSqrt; - // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) - return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt); - } - - return SDValue(); -} - static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { // This optimization is guarded by a function attribute because it may produce diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1555,6 +1555,21 @@ return nullptr; } +static Value *getCbrtCall(Value *V, AttributeList Attrs, + Module *M, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + + // use the libcall for cbrt(). + if (hasFloatFn(TLI, V->getType(), LibFunc_cbrt, LibFunc_cbrtf, LibFunc_cbrtl)) + // TODO: We also should check that the target can in fact lower the cbrt() + // libcall. We currently have no way to ask this question, so we ask if + // the target has a cbrt() libcall, which is not exactly the same. + return emitUnaryFloatFnCall(V, TLI, LibFunc_cbrt, LibFunc_cbrtf, + LibFunc_cbrtl, B, Attrs); + + return nullptr; +} + /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); @@ -1662,31 +1677,76 @@ if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. - // If the exponent is an integer+0.5 we generate a call to sqrt and an - // additional fmul. + // If the exponent is an integer+fraction we generate a call to sqrt or + // cbrt and an additional fmul. // TODO: This whole transformation should be backend specific (e.g. some // backends might prefer libcalls or the limit for the exponent might // be different) and it should also consider optimizing for size. APFloat LimF(ExpoF->getSemantics(), 33), ExpoA(abs(*ExpoF)); if (ExpoA < LimF) { - // This transformation applies to integer or integer+0.5 exponents only. - // For integer+0.5, we create a sqrt(Base) call. - Value *Sqrt = nullptr; + // This transformation applies to integer or integer+fraction exponents: + // For pow(Base,n+0.5) -----> pow(Base,n)*sqrt(Base) + // for pow(Base,n+0.25) -----> pow(Base,n)*sqrt(sqrt(Base)) + // for pow(Base,n+0.75) -----> pow(Base,n)*sqrt(Base)*sqrt(sqrt(Base)) + // for pow(Base,n+1/3) -----> pow(Base,n)*cbrt(Base) + // for pow(Base,n+2/3) -----> pow(Base,n)*cbrt(Base)*cbrt(Base) + Value *Sqrt = nullptr, *Cbrt = nullptr, *Pow23 = nullptr; + Value *SqrtSqrt = nullptr, *Pow075 = nullptr; if (!ExpoA.isInteger()) { - APFloat Expo2 = ExpoA; - // To check if ExpoA is an integer + 0.5, we add it to itself. If there + // Separate the integer part of ExpoA + APFloat ExpoD = ExpoA; + ExpoD.roundToIntegral(APFloat::rmTowardZero); + if (ExpoD.subtract(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + return nullptr; + ExpoD.clearSign(); + + // To check if ExpoD is 0.5, we add it to itself. If there // is no floating point exception and the result is an integer, then - // ExpoA == integer + 0.5 - if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + // ExpoD == 0.5 + APFloat Expo2 = ExpoD; + if (Expo2.add(ExpoD, APFloat::rmNearestTiesToEven) != APFloat::opOK) return nullptr; - if (!Expo2.isInteger()) + // To check if ExpoD is 1/3 or 2/3, we add it two times to itself. + APFloat Expo3 = Expo2; + Expo3.add(ExpoD, APFloat::rmNearestTiesToEven); + + // To check if ExpoD is 0.25 or 0.75, we add it four times to itself. + APFloat Expo4 = Expo2; + if (Expo4.add(Expo2, APFloat::rmNearestTiesToEven) != APFloat::opOK) return nullptr; - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); - if (!Sqrt) + // To check if ExpoD is 0.5 + if (Expo2.isInteger()) { + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); + if (!Sqrt) + return nullptr; + + // To check if ExpoD is 1/3 or 2/3 + } else if (Expo3.isInteger()) { + Cbrt = getCbrtCall(Base, Pow->getCalledFunction()->getAttributes(), + M, B, TLI); + if (!Cbrt) + return nullptr; + + // To check if ExpoD is 2/3 (and not 1/3), we check if Expo3 is divisible by 2. + if (Expo3.divide(APFloat(Expo3.getSemantics(), 2), APFloat::rmNearestTiesToEven) == APFloat::opOK && Expo3.isInteger()){ + Pow23 = B.CreateFMul(Cbrt, Cbrt, "pow23"); + } + // To check if ExpoD is 0.25 or 0.75 + } else if (Expo4.isInteger()) { + Value *SqrtTmp = nullptr; + SqrtTmp = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); + SqrtSqrt = getSqrtCall(SqrtTmp, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); + // To check if ExpoD is 0.75 (and not 0.25), we check if Expo4 is divisible by 3. + if (Expo4.divide(APFloat(Expo4.getSemantics(), 3), APFloat::rmNearestTiesToEven) == APFloat::opOK && Expo4.isInteger()){ + Pow075 = B.CreateFMul(SqrtTmp, SqrtSqrt, "pow075"); + } + } else return nullptr; } @@ -1698,12 +1758,32 @@ // We cannot readily convert a non-double type (like float) to a double. // So we first convert it to something which could be converted to double. ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + Value *FMul = ConstantFP::get(Ty, 1.0); + if ((int) (ExpoA.convertToDouble()) != 0) + FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x). if (Sqrt) FMul = B.CreateFMul(FMul, Sqrt); + // Expand pow(x, y+1/3) to pow(x, y) * cbrt(x). + // Expand pow(x, y+2/3) to pow(x, y) * cbrt(x) * cbrt(x). + if (Cbrt) { + if (Pow23) + FMul = B.CreateFMul(FMul, Pow23); + else + FMul = B.CreateFMul(FMul, Cbrt); + } + + // Expand pow(x, y+0.25) to pow(x, y) * sqrt(sqrt(x)). + // Expand pow(x, y+0.75) to pow(x, y) * sqrt(x) * sqrt(sqrt(x)). + if (SqrtSqrt) { + if (Pow075) + FMul = B.CreateFMul(FMul, Pow075); + else + FMul = B.CreateFMul(FMul, SqrtSqrt); + } + // If the exponent is negative, then get the reciprocal. if (ExpoF->isNegative()) FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); diff --git a/llvm/test/CodeGen/AArch64/pow.75.ll b/llvm/test/CodeGen/AArch64/pow.75.ll --- a/llvm/test/CodeGen/AArch64/pow.75.ll +++ b/llvm/test/CodeGen/AArch64/pow.75.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- -debug 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S -mtriple=aarch64-- %s 2>&1 | FileCheck %s ; REQUIRES: asserts declare float @llvm.pow.f32(float, float) @@ -8,41 +8,45 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) define float @pow_f32_three_fourth_fmf(float %x) nounwind { -; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn float [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret float [[POW075]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) ret float %r } define double @pow_f64_three_fourth_fmf(double %x) nounwind { -; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn double [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret double [[POW075]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) ret double %r } define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { -; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_v4f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <4 x float> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <4 x float> [[POW075]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { -; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_v2f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <2 x double> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <2 x double> [[POW075]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S -mtriple=aarch64-- %s | FileCheck %s declare float @llvm.pow.f32(float, float) declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) @@ -8,125 +8,82 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) define float @pow_f32_one_fourth_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fsqrt s0, s0 -; CHECK-NEXT: fsqrt s0, s0 -; CHECK-NEXT: ret +; CHECK-LABEL: @pow_f32_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_one_fourth_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fsqrt d0, d0 -; CHECK-NEXT: fsqrt d0, d0 -; CHECK-NEXT: ret +; CHECK-LABEL: @pow_f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind { -; CHECK-LABEL: pow_v4f32_one_fourth_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fsqrt v0.4s, v0.4s -; CHECK-NEXT: fsqrt v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-LABEL: @pow_v4f32_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: ret <4 x float> [[SQRT1]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_fmf(<2 x double> %x) nounwind { -; CHECK-LABEL: pow_v2f64_one_fourth_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fsqrt v0.2d, v0.2d -; CHECK-NEXT: fsqrt v0.2d, v0.2d -; CHECK-NEXT: ret +; CHECK-LABEL: @pow_v2f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: ret <2 x double> [[SQRT1]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } define float @pow_f32_one_fourth_not_enough_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_not_enough_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: b powf +; CHECK-LABEL: @pow_f32_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; %r = call afn ninf float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_not_enough_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_one_fourth_not_enough_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, #0.25000000 -; CHECK-NEXT: b pow - %r = call nsz ninf double @llvm.pow.f64(double %x, double 2.5e-01) +; CHECK-LABEL: @pow_f64_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] +; + %r = call afn nsz ninf double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind { -; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: bl powf -; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: mov s0, v0.s[2] -; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: mov v1.s[3], v0.s[0] -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-LABEL: @pow_v4f32_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: ret <4 x float> [[SQRT1]] +; %r = call afn nsz <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwind { -; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: fmov d1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: bl pow -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov d1, #0.25000000 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl pow -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret - %r = call nsz nnan reassoc <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) +; CHECK-LABEL: @pow_v2f64_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call reassoc nnan nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call reassoc nnan nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: ret <2 x double> [[SQRT1]] +; + %r = call afn nsz nnan reassoc <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } diff --git a/llvm/test/CodeGen/AArch64/pow.third.ll b/llvm/test/CodeGen/AArch64/pow.third.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pow.third.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S -mtriple=aarch64-- %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_one_third_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_one_third_fmf( +; CHECK-NEXT: [[CBRTF:%.*]] = tail call ninf nsz afn float @cbrtf(float [[X:%.*]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: ret float [[CBRTF]] +; + %a = fptrunc double 0x3FD5555555555555 to float + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float %a) + ret float %r +} + +define double @pow_f64_one_third_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_one_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[CBRT]] +; + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3FD5555555555555) + ret double %r +} + +define <4 x float> @pow_v4f32_one_third_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_one_third_fmf( +; CHECK-NEXT: [[CBRTL:%.*]] = tail call fast <4 x float> @cbrtl(<4 x float> [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: ret <4 x float> [[CBRTL]] +; + %a = fptrunc <4 x double> to <4 x float> + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> %a) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_one_third_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_one_third_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[CBRTL:%.*]] = tail call <4 x float> @cbrtl(<4 x float> [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[CBRTL]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @pow_f32_two_third_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_two_third_fmf( +; CHECK-NEXT: [[CBRTF:%.*]] = tail call ninf nsz afn float @cbrtf(float [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn float [[CBRTF]], [[CBRTF]] +; CHECK-NEXT: ret float [[POW23]] +; + %a = fptrunc double 0x3FE5555555555555 to float + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float %a) + ret float %r +} + +define double @pow_f64_two_third_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_two_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn double [[CBRT]], [[CBRT]] +; CHECK-NEXT: ret double [[POW23]] +; + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3FE5555555555555) + ret double %r +} + +define <4 x float> @pow_v4f32_two_third_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_two_third_fmf( +; CHECK-NEXT: [[CBRTL:%.*]] = tail call fast <4 x float> @cbrtl(<4 x float> [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul fast <4 x float> [[CBRTL]], [[CBRTL]] +; CHECK-NEXT: ret <4 x float> [[POW23]] +; + %a = fptrunc <4 x double> to <4 x float> + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> %a) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_two_third_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_two_third_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[CBRTL:%.*]] = tail call <4 x float> @cbrtl(<4 x float> [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[CBRTL]] to <2 x double> +; CHECK-NEXT: [[POW23:%.*]] = fmul fast <2 x double> [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[POW23]] +; + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} diff --git a/llvm/test/CodeGen/ARM/pow.75.ll b/llvm/test/CodeGen/ARM/pow.75.ll --- a/llvm/test/CodeGen/ARM/pow.75.ll +++ b/llvm/test/CodeGen/ARM/pow.75.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon -debug 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon 2>&1 | FileCheck %s ; REQUIRES: asserts declare float @llvm.pow.f32(float, float) @@ -8,63 +8,45 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) define float @pow_f32_three_fourth_fmf(float %x) nounwind { -; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn float [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret float [[POW075]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) ret float %r } define double @pow_f64_three_fourth_fmf(double %x) nounwind { -; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn t2, ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn double [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret double [[POW075]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) ret double %r } define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { -; CHECK: Combining: {{.*}}: v4f32 = BUILD_VECTOR [[FORTH:t[0-9]+]], [[THIRD:t[0-9]+]], [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]] -; CHECK: Combining: [[FIRST]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK: Combining: [[SECOND]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK: Combining: [[THIRD]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK: Combining: [[FORTH]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_v4f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <4 x float> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <4 x float> [[POW075]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { -; CHECK: Combining: {{.*}}: v2f64 = BUILD_VECTOR [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]] -; CHECK: Combining: [[FIRST]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t16, t17 -; CHECK: Combining: [[SECOND]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t19, t20 +; CHECK-LABEL: @pow_v2f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <2 x double> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <2 x double> [[POW075]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } diff --git a/llvm/test/CodeGen/ARM/pow.ll b/llvm/test/CodeGen/ARM/pow.ll --- a/llvm/test/CodeGen/ARM/pow.ll +++ b/llvm/test/CodeGen/ARM/pow.ll @@ -1,6 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefixes=ANY,SOFTFLOAT -; RUN: llc < %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon | FileCheck %s --check-prefixes=ANY,HARDFLOAT +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=SOFTFLOAT +; RUN: opt -O2 -S %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon | FileCheck %s --check-prefix=HARDFLOAT declare float @llvm.pow.f32(float, float) declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) @@ -9,99 +9,143 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) define float @pow_f32_one_fourth_fmf(float %x) nounwind { -; ANY-LABEL: pow_f32_one_fourth_fmf: -; SOFTFLOAT: bl powf -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 +; SOFTFLOAT-LABEL: @pow_f32_one_fourth_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; SOFTFLOAT-NEXT: ret float [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_f32_one_fourth_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; HARDFLOAT-NEXT: ret float [[SQRT1]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_fmf(double %x) nounwind { -; ANY-LABEL: pow_f64_one_fourth_fmf: -; SOFTFLOAT: bl pow -; HARDFLOAT: vsqrt.f64 -; HARDFLOAT: vsqrt.f64 +; SOFTFLOAT-LABEL: @pow_f64_one_fourth_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; SOFTFLOAT-NEXT: ret double [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_f64_one_fourth_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; HARDFLOAT-NEXT: ret double [[SQRT1]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define float @pow_f32_one_third_fmf(float %x) nounwind { -; ANY-LABEL: pow_f32_one_third_fmf: -; SOFTFLOAT: bl cbrtf -; HARDFLOAT: b cbrtf +; SOFTFLOAT-LABEL: @pow_f32_one_third_fmf( +; SOFTFLOAT-NEXT: [[CBRTF:%.*]] = tail call fast float @cbrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]] +; SOFTFLOAT-NEXT: ret float [[CBRTF]] +; +; HARDFLOAT-LABEL: @pow_f32_one_third_fmf( +; HARDFLOAT-NEXT: [[CBRTF:%.*]] = tail call fast float @cbrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]] +; HARDFLOAT-NEXT: ret float [[CBRTF]] +; %r = call fast float @llvm.pow.f32(float %x, float 0x3FD5555560000000) ret float %r } define double @pow_f64_one_third_fmf(double %x) nounwind { -; ANY-LABEL: pow_f64_one_third_fmf: -; SOFTFLOAT: bl cbrt -; HARDFLOAT: b cbrt +; SOFTFLOAT-LABEL: @pow_f64_one_third_fmf( +; SOFTFLOAT-NEXT: [[CBRT:%.*]] = tail call fast double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; SOFTFLOAT-NEXT: ret double [[CBRT]] +; +; HARDFLOAT-LABEL: @pow_f64_one_third_fmf( +; HARDFLOAT-NEXT: [[CBRT:%.*]] = tail call fast double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; HARDFLOAT-NEXT: ret double [[CBRT]] +; %r = call fast double @llvm.pow.f64(double %x, double 0x3FD5555555555555) ret double %r } define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind { -; ANY-LABEL: pow_v4f32_one_fourth_fmf: -; SOFTFLOAT: bl powf -; SOFTFLOAT: bl powf -; SOFTFLOAT: bl powf -; SOFTFLOAT: bl powf -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 -; HARDFLOAT: vsqrt.f32 +; SOFTFLOAT-LABEL: @pow_v4f32_one_fourth_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; SOFTFLOAT-NEXT: ret <4 x float> [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_v4f32_one_fourth_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; HARDFLOAT-NEXT: ret <4 x float> [[SQRT1]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_fmf(<2 x double> %x) nounwind { -; ANY-LABEL: pow_v2f64_one_fourth_fmf: -; SOFTFLOAT: bl pow -; SOFTFLOAT: bl pow -; HARDFLOAT: vsqrt.f64 -; HARDFLOAT: vsqrt.f64 -; HARDFLOAT: vsqrt.f64 -; HARDFLOAT: vsqrt.f64 +; SOFTFLOAT-LABEL: @pow_v2f64_one_fourth_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; SOFTFLOAT-NEXT: ret <2 x double> [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_v2f64_one_fourth_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; HARDFLOAT-NEXT: ret <2 x double> [[SQRT1]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } define float @pow_f32_one_fourth_not_enough_fmf(float %x) nounwind { -; ANY-LABEL: pow_f32_one_fourth_not_enough_fmf: -; SOFTFLOAT: bl powf -; HARDFLOAT: b powf +; SOFTFLOAT-LABEL: @pow_f32_one_fourth_not_enough_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[SQRT]]) +; SOFTFLOAT-NEXT: ret float [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_f32_one_fourth_not_enough_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[SQRT]]) +; HARDFLOAT-NEXT: ret float [[SQRT1]] +; %r = call afn ninf float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_not_enough_fmf(double %x) nounwind { -; ANY-LABEL: pow_f64_one_fourth_not_enough_fmf: -; SOFTFLOAT: bl pow -; HARDFLOAT: b pow +; SOFTFLOAT-LABEL: @pow_f64_one_fourth_not_enough_fmf( +; SOFTFLOAT-NEXT: [[R:%.*]] = tail call ninf nsz double @llvm.pow.f64(double [[X:%.*]], double 2.500000e-01) +; SOFTFLOAT-NEXT: ret double [[R]] +; +; HARDFLOAT-LABEL: @pow_f64_one_fourth_not_enough_fmf( +; HARDFLOAT-NEXT: [[R:%.*]] = tail call ninf nsz double @llvm.pow.f64(double [[X:%.*]], double 2.500000e-01) +; HARDFLOAT-NEXT: ret double [[R]] +; %r = call nsz ninf double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind { -; ANY-LABEL: pow_v4f32_one_fourth_not_enough_fmf: -; ANY: bl powf -; ANY: bl powf -; ANY: bl powf -; ANY: bl powf +; SOFTFLOAT-LABEL: @pow_v4f32_one_fourth_not_enough_fmf( +; SOFTFLOAT-NEXT: [[SQRT:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; SOFTFLOAT-NEXT: [[SQRT1:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; SOFTFLOAT-NEXT: ret <4 x float> [[SQRT1]] +; +; HARDFLOAT-LABEL: @pow_v4f32_one_fourth_not_enough_fmf( +; HARDFLOAT-NEXT: [[SQRT:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; HARDFLOAT-NEXT: [[SQRT1:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; HARDFLOAT-NEXT: ret <4 x float> [[SQRT1]] +; %r = call afn nsz <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwind { -; ANY-LABEL: pow_v2f64_one_fourth_not_enough_fmf: -; ANY: bl pow -; ANY: bl pow +; SOFTFLOAT-LABEL: @pow_v2f64_one_fourth_not_enough_fmf( +; SOFTFLOAT-NEXT: [[R:%.*]] = tail call reassoc nnan nsz <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; SOFTFLOAT-NEXT: ret <2 x double> [[R]] +; +; HARDFLOAT-LABEL: @pow_v2f64_one_fourth_not_enough_fmf( +; HARDFLOAT-NEXT: [[R:%.*]] = tail call reassoc nnan nsz <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; HARDFLOAT-NEXT: ret <2 x double> [[R]] +; %r = call nsz nnan reassoc <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } diff --git a/llvm/test/CodeGen/PowerPC/pow.75.ll b/llvm/test/CodeGen/PowerPC/pow.75.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/pow.75.ll +++ /dev/null @@ -1,48 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -debug 2>&1 | FileCheck %s -; REQUIRES: asserts - -declare float @llvm.pow.f32(float, float) -declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) -declare double @llvm.pow.f64(double, double) -declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) - -define float @pow_f32_three_fourth_fmf(float %x) nounwind { -; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] - %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) - ret float %r -} - -define double @pow_f64_three_fourth_fmf(double %x) nounwind { -; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] - %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) - ret double %r -} - -define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { -; CHECK: Combining: {{.*}}: v4f32 = fpow ninf afn [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt ninf afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt ninf afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul ninf afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul ninf afn [[SQRT]], [[SQRTSQRT]] - %r = call ninf afn <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) - ret <4 x float> %r -} - -define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { -; CHECK: Combining: {{.*}}: v2f64 = fpow ninf afn [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt ninf afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt ninf afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul ninf afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul ninf afn [[SQRT]], [[SQRTSQRT]] - %r = call ninf afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) - ret <2 x double> %r -} diff --git a/llvm/test/CodeGen/PowerPC/pow.ll b/llvm/test/CodeGen/PowerPC/pow.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pow.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S -mtriple=powerpc64le-unknown-unknown %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_one_fourth_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; + %r = call ninf nsz afn float @llvm.pow.f32(float %x, float 2.5e-01) + ret float %r +} + +define double @pow_f64_one_fourth_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] +; + %r = call ninf nsz afn double @llvm.pow.f64(double %x, double 2.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: ret <4 x float> [[SQRT1]] +; + %r = call ninf nsz afn <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_one_fourth_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: ret <2 x double> [[SQRT1]] +; + %r = call ninf nsz afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @pow_f32_three_fourth_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn float [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret float [[POW075]] +; + %r = call ninf nsz afn float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +define double @pow_f64_three_fourth_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn double [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret double [[POW075]] +; + %r = call ninf nsz afn double @llvm.pow.f64(double %x, double 7.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn <4 x float> [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret <4 x float> [[POW075]] +; + %r = call ninf nsz afn <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn <2 x double> [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret <2 x double> [[POW075]] +; + %r = call ninf nsz afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @pow_f32_one_third_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_one_third_fmf( +; CHECK-NEXT: [[CBRTF:%.*]] = tail call ninf nsz afn float @cbrtf(float [[X:%.*]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: ret float [[CBRTF]] +; + %r = call ninf nsz afn float @llvm.pow.f32(float %x, float 0x3FD5555560000000) + ret float %r +} + +define double @pow_f64_one_third_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_one_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[CBRT]] +; + %r = call ninf nsz afn double @llvm.pow.f64(double %x, double 0x3FD5555555555555) + ret double %r +} + +define <4 x float> @pow_v4f32_one_third_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_one_third_fmf( +; CHECK-NEXT: [[CBRTL:%.*]] = tail call ninf nsz afn <4 x float> @cbrtl(<4 x float> [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: ret <4 x float> [[CBRTL]] +; + %r = call ninf nsz afn <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_one_third_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_one_third_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[CBRTL:%.*]] = tail call <4 x float> @cbrtl(<4 x float> [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[CBRTL]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %r = call ninf nsz afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + +define float @pow_f32_two_third_fmf(float %x) nounwind { +; CHECK-LABEL: @pow_f32_two_third_fmf( +; CHECK-NEXT: [[CBRTF:%.*]] = tail call ninf nsz afn float @cbrtf(float [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn float [[CBRTF]], [[CBRTF]] +; CHECK-NEXT: ret float [[POW23]] +; + %r = call ninf nsz afn float @llvm.pow.f32(float %x, float 0x3FE5555560000000) + ret float %r +} + +define double @pow_f64_two_third_fmf(double %x) nounwind { +; CHECK-LABEL: @pow_f64_two_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn double [[CBRT]], [[CBRT]] +; CHECK-NEXT: ret double [[POW23]] +; + %r = call ninf nsz afn double @llvm.pow.f64(double %x, double 0x3FE5555555555555) + ret double %r +} + +define <4 x float> @pow_v4f32_two_third_fmf(<4 x float> %x) nounwind { +; CHECK-LABEL: @pow_v4f32_two_third_fmf( +; CHECK-NEXT: [[CBRTL:%.*]] = tail call ninf nsz afn <4 x float> @cbrtl(<4 x float> [[X:%.*]]) #[[ATTR0]] +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn <4 x float> [[CBRTL]], [[CBRTL]] +; CHECK-NEXT: ret <4 x float> [[POW23]] +; + %r = call ninf nsz afn <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_two_third_fmf(<2 x double> %x) nounwind { +; CHECK-LABEL: @pow_v2f64_two_third_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[CBRTL:%.*]] = tail call <4 x float> @cbrtl(<4 x float> [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[CBRTL]] to <2 x double> +; CHECK-NEXT: [[POW23:%.*]] = fmul ninf nsz afn <2 x double> [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[POW23]] +; + %r = call ninf nsz afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} + diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll --- a/llvm/test/CodeGen/WebAssembly/libcalls.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -72,7 +72,7 @@ %e = call double @llvm.log.f64(double %d) ; CHECK: call $push{{[0-9]}}=, exp %f = call double @llvm.exp.f64(double %e) - ; CHECK: call $push{{[0-9]}}=, cbrt + ; CHECK: call $push{{[0-9]}}=, pow %g = call fast double @llvm.pow.f64(double %f, double 0x3FD5555555555555) ; CHECK: call $push{{[0-9]}}=, lround %h = call i32 @llvm.lround(double %g) diff --git a/llvm/test/CodeGen/X86/pow.75.ll b/llvm/test/CodeGen/X86/pow.75.ll --- a/llvm/test/CodeGen/X86/pow.75.ll +++ b/llvm/test/CodeGen/X86/pow.75.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- -debug 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S %s -mtriple=x86_64-- 2>&1 | FileCheck %s ; REQUIRES: asserts declare float @llvm.pow.f32(float, float) @@ -7,41 +8,45 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) define float @pow_f32_three_fourth_fmf(float %x) nounwind { -; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn float [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret float [[POW075]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) ret float %r } define double @pow_f64_three_fourth_fmf(double %x) nounwind { -; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul ninf nsz afn double [[SQRT]], [[SQRT1]] +; CHECK-NEXT: ret double [[POW075]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) ret double %r } define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { -; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_v4f32_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <4 x float> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <4 x float> [[POW075]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { -; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} -; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] -; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] -; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] -; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-LABEL: @pow_v2f64_three_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: [[POW075:%.*]] = fmul fast <2 x double> [[SQRT1]], [[SQRT]] +; CHECK-NEXT: ret <2 x double> [[POW075]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } diff --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll --- a/llvm/test/CodeGen/X86/pow.ll +++ b/llvm/test/CodeGen/X86/pow.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s declare float @llvm.pow.f32(float, float) declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) @@ -10,203 +10,98 @@ declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee: -; CHECK: # %bb.0: -; CHECK-NEXT: rsqrtss %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: mulss %xmm1, %xmm3 -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps %xmm3, %xmm4 -; CHECK-NEXT: mulss %xmm2, %xmm4 -; CHECK-NEXT: mulss %xmm1, %xmm3 -; CHECK-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: addss %xmm5, %xmm3 -; CHECK-NEXT: mulss %xmm4, %xmm3 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: andps %xmm1, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: cmpltss %xmm4, %xmm0 -; CHECK-NEXT: andnps %xmm3, %xmm0 -; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: rsqrtss %xmm0, %xmm3 -; CHECK-NEXT: andps %xmm0, %xmm1 -; CHECK-NEXT: mulss %xmm3, %xmm0 -; CHECK-NEXT: mulss %xmm0, %xmm2 -; CHECK-NEXT: mulss %xmm3, %xmm0 -; CHECK-NEXT: addss %xmm5, %xmm0 -; CHECK-NEXT: mulss %xmm2, %xmm0 -; CHECK-NEXT: cmpltss %xmm4, %xmm1 -; CHECK-NEXT: andnps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_f32_one_fourth_fmf_ieee( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define float @pow_f32_one_fourth_fmf_daz(float %x) #0 { -; CHECK-LABEL: pow_f32_one_fourth_fmf_daz: -; CHECK: # %bb.0: -; CHECK-NEXT: rsqrtss %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: mulss %xmm1, %xmm2 -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps %xmm2, %xmm4 -; CHECK-NEXT: mulss %xmm3, %xmm4 -; CHECK-NEXT: mulss %xmm1, %xmm2 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: addss %xmm1, %xmm2 -; CHECK-NEXT: mulss %xmm4, %xmm2 -; CHECK-NEXT: xorps %xmm4, %xmm4 -; CHECK-NEXT: cmpeqss %xmm4, %xmm0 -; CHECK-NEXT: andnps %xmm2, %xmm0 -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: rsqrtss %xmm0, %xmm2 -; CHECK-NEXT: movaps %xmm0, %xmm5 -; CHECK-NEXT: mulss %xmm2, %xmm5 -; CHECK-NEXT: mulss %xmm5, %xmm3 -; CHECK-NEXT: mulss %xmm2, %xmm5 -; CHECK-NEXT: addss %xmm1, %xmm5 -; CHECK-NEXT: mulss %xmm3, %xmm5 -; CHECK-NEXT: cmpeqss %xmm4, %xmm0 -; CHECK-NEXT: andnps %xmm5, %xmm0 -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_f32_one_fourth_fmf_daz( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_one_fourth_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: sqrtsd %xmm0, %xmm0 -; CHECK-NEXT: sqrtsd %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf nsz afn double @llvm.sqrt.f64(double [[SQRT]]) +; CHECK-NEXT: ret double [[SQRT1]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind { -; CHECK-LABEL: pow_v4f32_one_fourth_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: rsqrtps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: mulps %xmm1, %xmm2 -; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; CHECK-NEXT: movaps %xmm2, %xmm4 -; CHECK-NEXT: mulps %xmm3, %xmm4 -; CHECK-NEXT: mulps %xmm1, %xmm2 -; CHECK-NEXT: movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] -; CHECK-NEXT: addps %xmm5, %xmm2 -; CHECK-NEXT: mulps %xmm4, %xmm2 -; CHECK-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: andps %xmm4, %xmm0 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] -; CHECK-NEXT: movaps %xmm1, %xmm6 -; CHECK-NEXT: cmpleps %xmm0, %xmm6 -; CHECK-NEXT: andps %xmm2, %xmm6 -; CHECK-NEXT: rsqrtps %xmm6, %xmm0 -; CHECK-NEXT: movaps %xmm6, %xmm2 -; CHECK-NEXT: mulps %xmm0, %xmm2 -; CHECK-NEXT: mulps %xmm2, %xmm3 -; CHECK-NEXT: mulps %xmm0, %xmm2 -; CHECK-NEXT: addps %xmm5, %xmm2 -; CHECK-NEXT: mulps %xmm3, %xmm2 -; CHECK-NEXT: andps %xmm4, %xmm6 -; CHECK-NEXT: cmpleps %xmm6, %xmm1 -; CHECK-NEXT: andps %xmm2, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_v4f32_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: ret <4 x float> [[SQRT1]] +; %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_fmf(<2 x double> %x) nounwind { -; CHECK-LABEL: pow_v2f64_one_fourth_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: sqrtpd %xmm0, %xmm0 -; CHECK-NEXT: sqrtpd %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_v2f64_one_fourth_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[SQRT]]) +; CHECK-NEXT: ret <2 x double> [[SQRT1]] +; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } define float @pow_f32_one_fourth_not_enough_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_not_enough_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: jmp powf@PLT # TAILCALL +; CHECK-LABEL: @pow_f32_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call ninf afn float @llvm.sqrt.f32(float [[SQRT]]) +; CHECK-NEXT: ret float [[SQRT1]] +; %r = call afn ninf float @llvm.pow.f32(float %x, float 2.5e-01) ret float %r } define double @pow_f64_one_fourth_not_enough_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_one_fourth_not_enough_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: jmp pow@PLT # TAILCALL +; CHECK-LABEL: @pow_f64_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[R:%.*]] = tail call ninf nsz double @llvm.pow.f64(double [[X:%.*]], double 2.500000e-01) +; CHECK-NEXT: ret double [[R]] +; %r = call nsz ninf double @llvm.pow.f64(double %x, double 2.5e-01) ret double %r } define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind { -; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $56, %rsp -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $56, %rsp -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_v4f32_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[SQRT:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]]) +; CHECK-NEXT: [[SQRT1:%.*]] = tail call nsz afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[SQRT]]) +; CHECK-NEXT: ret <4 x float> [[SQRT1]] +; %r = call afn nsz <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r } define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwind { -; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: callq pow@PLT -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: callq pow@PLT -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_v2f64_one_fourth_not_enough_fmf( +; CHECK-NEXT: [[R:%.*]] = tail call reassoc nnan nsz <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: ret <2 x double> [[R]] +; %r = call nsz nnan reassoc <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r } define float @pow_f32_one_third_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_third_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp cbrtf@PLT # TAILCALL +; CHECK-LABEL: @pow_f32_one_third_fmf( +; CHECK-NEXT: [[CBRTF:%.*]] = tail call nnan ninf nsz afn float @cbrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret float [[CBRTF]] +; %one = uitofp i32 1 to float %three = uitofp i32 3 to float %exp = fdiv float %one, %three @@ -215,9 +110,10 @@ } define double @pow_f64_one_third_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_one_third_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp cbrt@PLT # TAILCALL +; CHECK-LABEL: @pow_f64_one_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call nnan ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret double [[CBRT]] +; %one = uitofp i32 1 to double %three = uitofp i32 3 to double %exp = fdiv double %one, %three @@ -228,16 +124,10 @@ ; TODO: We could turn this into cbrtl, but currently we only handle float/double types. define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind { -; CHECK-LABEL: pow_f80_one_third_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fstpt (%rsp) -; CHECK-NEXT: callq powl@PLT -; CHECK-NEXT: addq $40, %rsp -; CHECK-NEXT: retq +; CHECK-LABEL: @pow_f80_one_third_fmf( +; CHECK-NEXT: [[CBRTL:%.*]] = tail call nnan ninf nsz afn x86_fp80 @cbrtl(x86_fp80 [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret x86_fp80 [[CBRTL]] +; %one = uitofp i32 1 to x86_fp80 %three = uitofp i32 3 to x86_fp80 %exp = fdiv x86_fp80 %one, %three @@ -248,10 +138,10 @@ ; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555. define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: jmp pow@PLT # TAILCALL +; CHECK-LABEL: @pow_f64_not_exactly_one_third_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call nnan ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret double [[CBRT]] +; %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556) ret double %r } @@ -259,10 +149,10 @@ ; We require all 4 of nsz, ninf, nnan, afn. define double @pow_f64_not_enough_fmf(double %x) nounwind { -; CHECK-LABEL: pow_f64_not_enough_fmf: -; CHECK: # %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: jmp pow@PLT # TAILCALL +; CHECK-LABEL: @pow_f64_not_enough_fmf( +; CHECK-NEXT: [[CBRT:%.*]] = tail call ninf nsz afn double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret double [[CBRT]] +; %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555) ret double %r } diff --git a/llvm/test/Transforms/InstCombine/pow-cbrt.ll b/llvm/test/Transforms/InstCombine/pow-cbrt.ll --- a/llvm/test/Transforms/InstCombine/pow-cbrt.ll +++ b/llvm/test/Transforms/InstCombine/pow-cbrt.ll @@ -3,8 +3,8 @@ define double @pow_intrinsic_third_fast(double %x) { ; CHECK-LABEL: @pow_intrinsic_third_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call fast double @cbrt(double [[X:%.*]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret double [[CBRT]] ; %pow = call fast double @llvm.pow.f64(double %x, double 0x3fd5555555555555) ret double %pow @@ -12,8 +12,8 @@ define float @powf_intrinsic_third_fast(float %x) { ; CHECK-LABEL: @powf_intrinsic_third_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call fast float @cbrtf(float [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[CBRTF]] ; %pow = call fast float @llvm.pow.f32(float %x, float 0x3fd5555560000000) ret float %pow @@ -21,8 +21,8 @@ define double @pow_intrinsic_third_approx(double %x) { ; CHECK-LABEL: @pow_intrinsic_third_approx( -; CHECK-NEXT: [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call afn double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret double [[CBRT]] ; %pow = call afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555) ret double %pow @@ -30,8 +30,8 @@ define float @powf_intrinsic_third_approx(float %x) { ; CHECK-LABEL: @powf_intrinsic_third_approx( -; CHECK-NEXT: [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call afn float @cbrtf(float [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[CBRTF]] ; %pow = call afn float @llvm.pow.f32(float %x, float 0x3fd5555560000000) ret float %pow @@ -39,8 +39,8 @@ define double @pow_libcall_third_fast(double %x) { ; CHECK-LABEL: @pow_libcall_third_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call fast double @cbrt(double [[X:%.*]]) +; CHECK-NEXT: ret double [[CBRT]] ; %pow = call fast double @pow(double %x, double 0x3fd5555555555555) ret double %pow @@ -48,8 +48,8 @@ define float @powf_libcall_third_fast(float %x) { ; CHECK-LABEL: @powf_libcall_third_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call fast float @cbrtf(float [[X:%.*]]) +; CHECK-NEXT: ret float [[CBRTF]] ; %pow = call fast float @powf(float %x, float 0x3fd5555560000000) ret float %pow @@ -57,8 +57,9 @@ define double @pow_intrinsic_negthird_fast(double %x) { ; CHECK-LABEL: @pow_intrinsic_negthird_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call fast double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[CBRT]] +; CHECK-NEXT: ret double [[RECIPROCAL]] ; %pow = call fast double @llvm.pow.f64(double %x, double 0xbfd5555555555555) ret double %pow @@ -66,8 +67,9 @@ define float @powf_intrinsic_negthird_fast(float %x) { ; CHECK-LABEL: @powf_intrinsic_negthird_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call fast float @cbrtf(float [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[CBRTF]] +; CHECK-NEXT: ret float [[RECIPROCAL]] ; %pow = call fast float @llvm.pow.f32(float %x, float 0xbfd5555560000000) ret float %pow @@ -75,8 +77,9 @@ define double @pow_intrinsic_negthird_approx(double %x) { ; CHECK-LABEL: @pow_intrinsic_negthird_approx( -; CHECK-NEXT: [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call afn double @cbrt(double [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv afn double 1.000000e+00, [[CBRT]] +; CHECK-NEXT: ret double [[RECIPROCAL]] ; %pow = call afn double @llvm.pow.f64(double %x, double 0xbfd5555555555555) ret double %pow @@ -84,8 +87,9 @@ define float @powf_intrinsic_negthird_approx(float %x) { ; CHECK-LABEL: @powf_intrinsic_negthird_approx( -; CHECK-NEXT: [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call afn float @cbrtf(float [[X:%.*]]) #[[ATTR1]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv afn float 1.000000e+00, [[CBRTF]] +; CHECK-NEXT: ret float [[RECIPROCAL]] ; %pow = call afn float @llvm.pow.f32(float %x, float 0xbfd5555560000000) ret float %pow @@ -93,8 +97,9 @@ define double @pow_libcall_negthird_fast(double %x) { ; CHECK-LABEL: @pow_libcall_negthird_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0xBFD5555555555555) -; CHECK-NEXT: ret double [[POW]] +; CHECK-NEXT: [[CBRT:%.*]] = call fast double @cbrt(double [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[CBRT]] +; CHECK-NEXT: ret double [[RECIPROCAL]] ; %pow = call fast double @pow(double %x, double 0xbfd5555555555555) ret double %pow @@ -102,8 +107,9 @@ define float @powf_libcall_negthird_fast(float %x) { ; CHECK-LABEL: @powf_libcall_negthird_fast( -; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0xBFD5555560000000) -; CHECK-NEXT: ret float [[POW]] +; CHECK-NEXT: [[CBRTF:%.*]] = call fast float @cbrtf(float [[X:%.*]]) +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[CBRTF]] +; CHECK-NEXT: ret float [[RECIPROCAL]] ; %pow = call fast float @powf(float %x, float 0xbfd5555560000000) ret float %pow