Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12330,12 +12330,24 @@ In, DAG.getUNDEF(SVT))); } -// The only differences between FABS and FNEG are the mask and the logic op. +/// The only differences between FABS and FNEG are the mask and the logic op. +/// FNEG also has a folding opportunity for FNEG(FABS(x)). static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && "Wrong opcode for lowering FABS or FNEG."); bool IsFABS = (Op.getOpcode() == ISD::FABS); + + // If this is a FABS and it has an FNEG user, bail out to fold the combination + // into an FNABS. We'll lower the FABS after that if it is still in use. + if (IsFABS) + for (SDNode *User : Op->uses()) + if (User->getOpcode() == ISD::FNEG) + return Op; + + SDValue Op0 = Op.getOperand(0); + bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); + SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); // Assume scalar op for initialization; update for vector if needed. @@ -12371,15 +12383,19 @@ // For a vector, cast operands to a vector type, perform the logic op, // and cast the result back to the original value type. MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); - SDValue Op0Casted = DAG.getNode(ISD::BITCAST, dl, VecVT, Op.getOperand(0)); SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask); - unsigned LogicOp = IsFABS ? ISD::AND : ISD::XOR; + SDValue Operand = IsFNABS ? + DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) : + DAG.getNode(ISD::BITCAST, dl, VecVT, Op0); + unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(LogicOp, dl, VecVT, Op0Casted, MaskCasted)); + DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); } + // If not vector, then scalar. - unsigned LogicOp = IsFABS ? X86ISD::FAND : X86ISD::FXOR; - return DAG.getNode(LogicOp, dl, VT, Op.getOperand(0), Mask); + unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; + SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; + return DAG.getNode(BitOp, dl, VT, Operand, Mask); } static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Index: test/CodeGen/X86/fnabs.ll =================================================================== --- test/CodeGen/X86/fnabs.ll +++ test/CodeGen/X86/fnabs.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx| FileCheck %s + +; Verify that we generate a single OR instruction for a scalar, vec128, and vec256 +; FNABS(x) operation -> FNEG (FABS(x)). +; If the FABS() result isn't used, the AND instruction should be eliminated. +; PR20578: http://llvm.org/bugs/show_bug.cgi?id=20578 + +define float @scalar_no_abs(float %a) { +; CHECK-LABEL: scalar_no_abs: +; CHECK: vorps +; CHECK-NEXT: retq + %fabs = tail call float @fabsf(float %a) #1 + %fsub = fsub float -0.0, %fabs + ret float %fsub +} + +define float @scalar_uses_abs(float %a) { +; CHECK-LABEL: scalar_uses_abs: +; CHECK-DAG: vandps +; CHECK-DAG: vorps +; CHECK: vmulss +; CHECK-NEXT: retq + %fabs = tail call float @fabsf(float %a) #1 + %fsub = fsub float -0.0, %fabs + %fmul = fmul float %fsub, %fabs + ret float %fmul +} + +define <4 x float> @vector128_no_abs(<4 x float> %a) { +; CHECK-LABEL: vector128_no_abs: +; CHECK: vorps +; CHECK-NEXT: retq + %fabs = tail call <4 x float> @llvm.fabs.v4f32(< 4 x float> %a) #1 + %fsub = fsub <4 x float> , %fabs + ret <4 x float> %fsub +} + +define <4 x float> @vector128_uses_abs(<4 x float> %a) { +; CHECK-LABEL: vector128_uses_abs: +; CHECK-DAG: vandps +; CHECK-DAG: vorps +; CHECK: vmulps +; CHECK-NEXT: retq + %fabs = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #1 + %fsub = fsub <4 x float> , %fabs + %fmul = fmul <4 x float> %fsub, %fabs + ret <4 x float> %fmul +} + +define <8 x float> @vector256_no_abs(<8 x float> %a) { +; CHECK-LABEL: vector256_no_abs: +; CHECK: vorps +; CHECK-NEXT: retq + %fabs = tail call <8 x float> @llvm.fabs.v8f32(< 8 x float> %a) #1 + %fsub = fsub <8 x float> , %fabs + ret <8 x float> %fsub +} + +define <8 x float> @vector256_uses_abs(<8 x float> %a) { +; CHECK-LABEL: vector256_uses_abs: +; CHECK-DAG: vandps +; CHECK-DAG: vorps +; CHECK: vmulps +; CHECK-NEXT: retq + %fabs = tail call <8 x float> @llvm.fabs.v8f32(<8 x float> %a) #1 + %fsub = fsub <8 x float> , %fabs + %fmul = fmul <8 x float> %fsub, %fabs + ret <8 x float> %fmul +} + +declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) +declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) + +declare float @fabsf(float) + +attributes #1 = { readnone } +