Index: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h @@ -550,11 +550,8 @@ /// is often a storage-only type but has native conversions. FP16_TO_FP, FP_TO_FP16, - /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, - /// FLOG, FLOG2, FLOG10, FEXP, FEXP2, - /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary - /// floating point operations. These are inspired by libm. - FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, + /// Perform various unary floating-point operations inspired by libm. + FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two Index: llvm/trunk/include/llvm/IR/RuntimeLibcalls.def =================================================================== --- llvm/trunk/include/llvm/IR/RuntimeLibcalls.def +++ llvm/trunk/include/llvm/IR/RuntimeLibcalls.def @@ -128,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F80, "sqrtl") HANDLE_LIBCALL(SQRT_F128, "sqrtl") HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl") +HANDLE_LIBCALL(CBRT_F32, "cbrtf") +HANDLE_LIBCALL(CBRT_F64, "cbrt") +HANDLE_LIBCALL(CBRT_F80, "cbrtl") +HANDLE_LIBCALL(CBRT_F128, "cbrtl") +HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl") HANDLE_LIBCALL(LOG_F32, "logf") HANDLE_LIBCALL(LOG_F64, "log") HANDLE_LIBCALL(LOG_F80, "logl") Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11571,6 +11571,34 @@ if (!ExponentC) return SDValue(); + // Try to convert x ** (1/3) into cube root. + // TODO: Handle the various flavors of long double. + // TODO: Since we're approximating, we don't need an exact 1/3 exponent. + // Some range near 1/3 should be fine. + EVT VT = N->getValueType(0); + if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || + (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { + // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. + // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. + // pow(-val, 1/3) = nan; cbrt(-val) = -num. + // For regular numbers, rounding may cause the results to differ. + // Therefore, we require { nsz ninf nnan afn } for this transform. + // TODO: We could select out the special cases if we don't have nsz/ninf. + SDNodeFlags Flags = N->getFlags(); + if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || + !Flags.hasApproximateFuncs()) + return SDValue(); + + // Do not create a cbrt() libcall if the target does not have it, and do not + // turn a pow that has lowering support into a cbrt() libcall. + if (!DAG.getLibInfo().has(LibFunc_cbrt) || + (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && + DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) + return SDValue(); + + return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); + } + // Try to convert x ** (1/4) into square roots. // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. // TODO: This could be extended (using a target hook) to handle smaller @@ -11587,7 +11615,6 @@ return SDValue(); // Don't double the number of libcalls. We are trying to inline fast code. - EVT VT = N->getValueType(0); if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) return SDValue(); Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4047,6 +4047,11 @@ RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; + case ISD::FCBRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128)); + break; case ISD::FSIN: case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -181,6 +181,7 @@ case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::STRICT_FSQRT: return "strict_fsqrt"; + case ISD::FCBRT: return "fcbrt"; case ISD::FSIN: return "fsin"; case ISD::STRICT_FSIN: return "strict_fsin"; case ISD::FCOS: return "fcos"; Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -666,6 +666,7 @@ // These library functions default to expand. for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FCBRT, VT, Expand); setOperationAction(ISD::FLOG , VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); Index: llvm/trunk/test/CodeGen/X86/pow.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pow.ll +++ llvm/trunk/test/CodeGen/X86/pow.ll @@ -7,6 +7,8 @@ declare double @llvm.pow.f64(double, double) declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) +declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) + define float @pow_f32_one_fourth_fmf(float %x) nounwind { ; CHECK-LABEL: pow_f32_one_fourth_fmf: ; CHECK: # %bb.0: @@ -165,8 +167,7 @@ define float @pow_f32_one_third_fmf(float %x) nounwind { ; CHECK-LABEL: pow_f32_one_third_fmf: ; CHECK: # %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: jmp powf # TAILCALL +; CHECK-NEXT: jmp cbrtf # TAILCALL %one = uitofp i32 1 to float %three = uitofp i32 3 to float %exp = fdiv float %one, %three @@ -177,8 +178,7 @@ define double @pow_f64_one_third_fmf(double %x) nounwind { ; CHECK-LABEL: pow_f64_one_third_fmf: ; CHECK: # %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: jmp pow # TAILCALL +; CHECK-NEXT: jmp cbrt # TAILCALL %one = uitofp i32 1 to double %three = uitofp i32 3 to double %exp = fdiv double %one, %three @@ -186,3 +186,45 @@ ret double %r } +; TODO: We could turn this into cbrtl, but currently we only handle float/double types. + +define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind { +; CHECK-LABEL: pow_f80_one_third_fmf: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{.*}}(%rip) +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq powl +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %one = uitofp i32 1 to x86_fp80 + %three = uitofp i32 3 to x86_fp80 + %exp = fdiv x86_fp80 %one, %three + %r = call nsz nnan ninf afn x86_fp80 @llvm.pow.f80(x86_fp80 %x, x86_fp80 %exp) + ret x86_fp80 %r +} + +; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555. + +define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind { +; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: jmp pow # TAILCALL + %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556) + ret double %r +} + +; We require all 4 of nsz, ninf, nnan, afn. + +define double @pow_f64_not_enough_fmf(double %x) nounwind { +; CHECK-LABEL: pow_f64_not_enough_fmf: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: jmp pow # TAILCALL + %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555) + ret double %r +} +