Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -678,6 +678,27 @@ return nullptr; } +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// float. +static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here. + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -6814,8 +6835,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - ConstantFPSDNode *N1CFP = dyn_cast(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -6835,13 +6856,31 @@ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) return N1; - // fold (fmul A, 0) -> 0, vector edition. - if (DAG.getTarget().Options.UnsafeFPMath && - ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; + + if (DAG.getTarget().Options.UnsafeFPMath) { + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isConstOrConstSplatFP(N0.getOperand(1))) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + } + + // If allowed, fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) + // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs + // during an early run of DAGCombiner can prevent folding with fmuls + // inserted during lowering. + if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + SDLoc SL(N); + const SDValue Two = DAG.getConstantFP(2.0, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + } + } + // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); @@ -6864,14 +6903,6 @@ } } - // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N0.getOpcode() == ISD::FMUL && - N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), N1)); - return SDValue(); } Index: test/CodeGen/Mips/msa/arithmetic_float.ll =================================================================== --- test/CodeGen/Mips/msa/arithmetic_float.ll +++ test/CodeGen/Mips/msa/arithmetic_float.ll @@ -276,8 +276,8 @@ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) %3 = fmul <4 x float> , %2 - ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384 - ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]] + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] store <4 x float> %3, <4 x float>* %c ; CHECK-DAG: st.w [[R5]], 0($4) @@ -287,16 +287,14 @@ } define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { - ; CHECK: .8byte 4611686018427387904 - ; CHECK-NEXT: .8byte 4611686018427387904 ; CHECK: fexp2_v2f64_2: %1 = load <2 x double>* %a ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) %3 = fmul <2 x double> , %2 - ; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]]) + ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1 + ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]] ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] store <2 x double> %3, <2 x double>* %c ; CHECK-DAG: st.d [[R4]], 0($4) Index: test/CodeGen/R600/fmul.ll =================================================================== --- test/CodeGen/R600/fmul.ll +++ test/CodeGen/R600/fmul.ll @@ -48,3 +48,28 @@ store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } + +; FUNC-LABEL: @test_mul_2_k +; SI: V_MUL_F32 +; SI-NOT: V_MUL_F32 +; SI: S_ENDPGM +define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 { + %y = fmul float %x, 2.0 + %z = fmul float %y, 3.0 + store float %z, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @test_mul_2_k_inv +; SI: V_MUL_F32 +; SI-NOT: V_MUL_F32 +; SI-NOT: V_MAD_F32 +; SI: S_ENDPGM +define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 { + %y = fmul float %x, 3.0 + %z = fmul float %y, 2.0 + store float %z, float addrspace(1)* %out + ret void +} + +attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } Index: test/CodeGen/R600/llvm.sin.ll =================================================================== --- test/CodeGen/R600/llvm.sin.ll +++ test/CodeGen/R600/llvm.sin.ll @@ -1,53 +1,84 @@ -;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s -;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s - -;FUNC-LABEL: test -;EG: MULADD_IEEE * -;EG: FRACT * -;EG: ADD * -;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -;EG-NOT: SIN -;SI: V_MUL_F32 -;SI: V_FRACT_F32 -;SI: V_SIN_F32 -;SI-NOT: V_SIN_F32 - -define void @test(float addrspace(1)* %out, float %x) #1 { +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s + +; FUNC-LABEL: sin_f32 +; EG: MULADD_IEEE * +; EG: FRACT * +; EG: ADD * +; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG-NOT: SIN +; SI: V_MUL_F32 +; SI: V_FRACT_F32 +; SI: V_SIN_F32 +; SI-NOT: V_SIN_F32 + +define void @sin_f32(float addrspace(1)* %out, float %x) #1 { %sin = call float @llvm.sin.f32(float %x) store float %sin, float addrspace(1)* %out ret void } -;FUNC-LABEL: testf -;SI-UNSAFE: 4.774 -;SI-UNSAFE: V_MUL_F32 -;SI-SAFE: V_MUL_F32 -;SI-SAFE: V_MUL_F32 -;SI: V_FRACT_F32 -;SI: V_SIN_F32 -;SI-NOT: V_SIN_F32 - -define void @testf(float addrspace(1)* %out, float %x) #1 { - %y = fmul float 3.0, %x +; FUNC-LABEL: @sin_3x_f32 +; SI-UNSAFE-NOT: V_ADD_F32 +; SI-UNSAFE: 4.774648e-01 +; SI-UNSAFE: V_MUL_F32 +; SI-SAFE: V_MUL_F32 +; SI-SAFE: V_MUL_F32 +; SI: V_FRACT_F32 +; SI: V_SIN_F32 +; SI-NOT: V_SIN_F32 +define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 3.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @sin_2x_f32 +; SI-UNSAFE-NOT: V_ADD_F32 +; SI-UNSAFE: 3.183099e-01 +; SI-UNSAFE: V_MUL_F32 +; SI-SAFE: V_ADD_F32 +; SI-SAFE: V_MUL_F32 +; SI: V_FRACT_F32 +; SI: V_SIN_F32 +; SI-NOT: V_SIN_F32 +define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 2.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @test_2sin_f32 +; SI-UNSAFE: 3.183099e-01 +; SI-UNSAFE: V_MUL_F32 +; SI-SAFE: V_ADD_F32 +; SI-SAFE: V_MUL_F32 +; SI: V_FRACT_F32 +; SI: V_SIN_F32 +; SI-NOT: V_SIN_F32 +define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, float addrspace(1)* %out ret void } -;FUNC-LABEL: testv -;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -;EG-NOT: SIN -;SI: V_SIN_F32 -;SI: V_SIN_F32 -;SI: V_SIN_F32 -;SI: V_SIN_F32 -;SI-NOT: V_SIN_F32 - -define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 { +; FUNC-LABEL: @sin_v4f32 +; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG-NOT: SIN +; SI: V_SIN_F32 +; SI: V_SIN_F32 +; SI: V_SIN_F32 +; SI: V_SIN_F32 +; SI-NOT: V_SIN_F32 + +define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 { %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx) store <4 x float> %sin, <4 x float> addrspace(1)* %out ret void Index: test/CodeGen/X86/fmul-combines.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/fmul-combines.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; CHECK-LABEL: fmul2_f32: +; CHECK: addss %xmm0, %xmm0 +define float @fmul2_f32(float %x) { + %y = fmul float %x, 2.0 + ret float %y +} + +; fmul 2.0, x -> fadd x, x for vectors. + +; CHECK-LABEL: fmul2_v4f32: +; CHECK: addps %xmm0, %xmm0 +; CHECK-NEXT: retq +define <4 x float> @fmul2_v4f32(<4 x float> %x) { + %y = fmul <4 x float> %x, + ret <4 x float> %y +} + +; CHECK-LABEL: constant_fold_fmul_v4f32: +; CHECK: movaps +; CHECK-NEXT: ret +define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) { + %y = fmul <4 x float> , + ret <4 x float> %y +} + +; CHECK-LABEL: fmul0_v4f32: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq +define <4 x float> @fmul0_v4f32(<4 x float> %x) #0 { + %y = fmul <4 x float> %x, + ret <4 x float> %y +} + +; CHECK-LABEL: fmul_c2_c4_v4f32: +; CHECK-NOT: addps +; CHECK: mulps +; CHECK-NOT: mulps +; CHECK-NEXT: ret +define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 { + %y = fmul <4 x float> %x, + %z = fmul <4 x float> %y, + ret <4 x float> %z +} + +; CHECK-LABEL: fmul_c3_c4_v4f32: +; CHECK-NOT: addps +; CHECK: mulps +; CHECK-NOT: mulps +; CHECK-NEXT: ret +define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 { + %y = fmul <4 x float> %x, + %z = fmul <4 x float> %y, + ret <4 x float> %z +} + +; CHECK-LABEL: fmul_c2_c4_f32: +; CHECK-NOT: addss +; CHECK: mulss +; CHECK-NOT: mulss +; CHECK-NEXT: ret +define float @fmul_c2_c4_f32(float %x) #0 { + %y = fmul float %x, 2.0 + %z = fmul float %y, 4.0 + ret float %z +} + +; CHECK-LABEL: fmul_c3_c4_f32: +; CHECK-NOT: addss +; CHECK: mulss +; CHECK-NOT: mulss +; CHECK-NET: ret +define float @fmul_c3_c4_f32(float %x) #0 { + %y = fmul float %x, 3.0 + %z = fmul float %y, 4.0 + ret float %z +} + +; CHECK-LABEL: fmul_fneg_fneg_f32: +; CHECK: mulss %xmm1, %xmm0 +; CHECK-NEXT: retq +define float @fmul_fneg_fneg_f32(float %x, float %y) { + %x.neg = fsub float -0.0, %x + %y.neg = fsub float -0.0, %y + %mul = fmul float %x.neg, %y.neg + ret float %mul +} +; CHECK-LABEL: fmul_fneg_fneg_v4f32: +; CHECK: mulps %xmm1, %xmm0 +; CHECK-NEXT: retq +define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) { + %x.neg = fsub <4 x float> , %x + %y.neg = fsub <4 x float> , %y + %mul = fmul <4 x float> %x.neg, %y.neg + ret <4 x float> %mul +} + +attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }