Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11912,18 +11912,24 @@ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); } - // Try to convert x ** (1/4) into square roots. + // Try to convert x ** (1/4) and x ** (3/4) into square roots. // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. // TODO: This could be extended (using a target hook) to handle smaller // power-of-2 fractional exponents. - if (ExponentC->getValueAPF().isExactlyValue(0.25)) { + bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25); + bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75); + if (ExponentIs025 || ExponentIs075) { // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. + // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0. + // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN. // For regular numbers, rounding may cause the results to differ. // Therefore, we require { nsz ninf afn } for this transform. // TODO: We could select out the special cases if we don't have nsz/ninf. SDNodeFlags Flags = N->getFlags(); - if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || + + // We only need no signed zeros for the 0.25 case. + if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || !Flags.hasApproximateFuncs()) return SDValue(); @@ -11939,7 +11945,11 @@ // pow(X, 0.25) --> sqrt(sqrt(X)) SDLoc DL(N); SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); - return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + if (ExponentIs025) + return SqrtSqrt; + // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) + return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags); } return SDValue(); Index: llvm/trunk/test/CodeGen/AArch64/pow.75.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/pow.75.ll +++ llvm/trunk/test/CodeGen/AArch64/pow.75.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- -debug 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_three_fourth_fmf(float %x) nounwind { +; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +define double @pow_f64_three_fourth_fmf(double %x) nounwind { +; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { +; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { +; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} Index: llvm/trunk/test/CodeGen/ARM/pow.75.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/pow.75.ll +++ llvm/trunk/test/CodeGen/ARM/pow.75.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon -debug 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_three_fourth_fmf(float %x) nounwind { +; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +define double @pow_f64_three_fourth_fmf(double %x) nounwind { +; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn t2, ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { +; CHECK: Combining: {{.*}}: v4f32 = BUILD_VECTOR [[FORTH:t[0-9]+]], [[THIRD:t[0-9]+]], [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]] +; CHECK: Combining: [[FIRST]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK: Combining: [[SECOND]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK: Combining: [[THIRD]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK: Combining: [[FORTH]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { +; CHECK: Combining: {{.*}}: v2f64 = BUILD_VECTOR [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]] +; CHECK: Combining: [[FIRST]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t16, t17 +; CHECK: Combining: [[SECOND]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t19, t20 + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} Index: llvm/trunk/test/CodeGen/PowerPC/pow.75.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pow.75.ll +++ llvm/trunk/test/CodeGen/PowerPC/pow.75.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -debug 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_three_fourth_fmf(float %x) nounwind { +; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +define double @pow_f64_three_fourth_fmf(double %x) nounwind { +; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { +; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { +; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +} Index: llvm/trunk/test/CodeGen/X86/pow.75.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pow.75.ll +++ llvm/trunk/test/CodeGen/X86/pow.75.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -debug 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare float @llvm.pow.f32(float, float) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare double @llvm.pow.f64(double, double) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define float @pow_f32_three_fourth_fmf(float %x) nounwind { +; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01) + ret float %r +} + +define double @pow_f64_three_fourth_fmf(double %x) nounwind { +; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01> +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]] + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01) + ret double %r +} + +define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind { +; CHECK: Combining: {{.*}}: v4f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v4f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v4f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind { +; CHECK: Combining: {{.*}}: v2f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], {{.*}} +; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]] +; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: v2f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]] +; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] +; CHECK-NEXT: ... into: [[R]]: v2f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]] + %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) + ret <2 x double> %r +}