Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -460,6 +460,11 @@ return true; } + /// Return true if denormals will be flushed to zero. + virtual bool willCanonicalize(SelectionDAG &DAG, SDNode *N) const { + return false; + } + /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { // Default behavior is to replace SQRT(X) with X*RSQRT(X). Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12708,18 +12708,26 @@ } // (fsub -0.0, N1) -> -N1 - // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the - // FSUB does not specify the sign bit of a NaN. Also note that for - // the same reason, the inverse transform is not safe, unless fast math - // flags are in play. if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) - return NegN1; - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); + // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are + // flushed to zero, unless all users treat denorms as zero (DAZ). + DenormalMode DenormMode = DAG.getDenormalMode(VT); + + // Check that all uses will flush denorms to zero. + bool Flushed = true; + for (auto UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) + if (!TLI.willCanonicalize(DAG, *UI)) + Flushed = false; + + if (Flushed || (DenormMode == DenormalMode::getIEEE())) { + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); + } } } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -692,7 +692,7 @@ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } void visitSub(const User &I) { visitBinary(I, ISD::SUB); } - void visitFSub(const User &I); + void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } void visitMul(const User &I) { visitBinary(I, ISD::MUL); } void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } void visitURem(const User &I) { visitBinary(I, ISD::UREM); } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3003,20 +3003,6 @@ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } -void SelectionDAGBuilder::visitFSub(const User &I) { - // -0.0 - X --> fneg - Type *Ty = I.getType(); - if (isa(I.getOperand(0)) && - I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), - Op2.getValueType(), Op2)); - return; - } - - visitBinary(I, ISD::FSUB); -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -176,6 +176,7 @@ NegatibleCost &Cost, unsigned Depth) const override; + bool willCanonicalize(SelectionDAG &DAG, SDNode *N) const override; bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -831,6 +831,52 @@ ForCodeSize, Cost, Depth); } +// Return true if the Opcode will treat denormals as zero (DAZ). +bool AMDGPUTargetLowering::willCanonicalize(SelectionDAG &DAG, SDNode *N) const { + // FIXME: This is not a complete list. This only represents current + // testing. + switch (N->getOpcode()) { + default: return false; + case ISD::FCANONICALIZE: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMA: + case ISD::FMAD: + case ISD::FMAXNUM: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FTRUNC: + case ISD::FSQRT: + case AMDGPUISD::CLAMP: + case AMDGPUISD::FMAD_FTZ: + case AMDGPUISD::FMED3: + case AMDGPUISD::RCP: + return true; + case ISD::FNEG: + case ISD::EXTRACT_VECTOR_ELT: + case ISD::EXTRACT_SUBVECTOR: { + for (auto UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) + if (!willCanonicalize(DAG, *UI)) + return false; + return true; + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID + = cast(N->getOperand(0))->getZExtValue(); + switch (IntrinsicID) { + case Intrinsic::amdgcn_fdiv_fast: + return true; + } + return false; + } + } + + llvm_unreachable("invalid operation"); +} + //===---------------------------------------------------------------------===// // Target Properties //===---------------------------------------------------------------------===// Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9145,7 +9145,6 @@ switch (Opcode) { // These will flush denorms if required. case ISD::FADD: - case ISD::FSUB: case ISD::FMUL: case ISD::FCEIL: case ISD::FFLOOR: @@ -9175,7 +9174,25 @@ case AMDGPUISD::CVT_F32_UBYTE2: case AMDGPUISD::CVT_F32_UBYTE3: return true; + case ISD::FSUB: { + SDValue N0 = Op.getOperand(0); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags Flags = Op->getFlags(); + + // FIXME: This works around a bug with FCANONICALIZE. Legalize + // will remove the FCANONICALIZE before the FSUB(-0,X)->FNEG(X) + // transform is considered. + // FSUB(+-0.0, X) will become FNEG(X) + if (N0CFP && N0CFP->isZero()) { + if (N0CFP->isNegative() || + (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { + return false; + } + } + return true; + } // It can/will be lowered or combined as a bit operation. // Need to check their input recursively to handle. case ISD::FNEG: Index: llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll +++ llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll @@ -232,8 +232,8 @@ ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp: ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} -; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} +; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} ; GCN-DENORM-DAG: v_rcp_f32_e32 ; GCN-DENORM-DAG: v_rcp_f32_e32 @@ -274,10 +274,10 @@ } ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp: -; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} -; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} -; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} -; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} +; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} +; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} +; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} +; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} ; GCN-DENORM-DAG: v_rcp_f32_e32 ; GCN-DENORM-DAG: v_rcp_f32_e32 @@ -298,8 +298,8 @@ ; GCN-DENORM-DAG: v_div_fmas_f32 ; GCN-DENORM-DAG: v_div_fmas_f32 -; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} -; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} +; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}} +; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}} ; GCN-FLUSH-DAG: v_rcp_f32_e32 ; GCN-FLUSH-DAG: v_rcp_f32_e64 Index: llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -192,6 +192,10 @@ ret void } +; FIXME: The MAD only folds the FSUB(-0,X) when the FNEG(X) transform +; happens in SelectionDAGBuilder. DAGCombiner probably needs to +; be updated to fold the FNEG after visitFSUB(...) runs. + ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f16 ; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]], ; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]], @@ -216,7 +220,7 @@ %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %r1.fneg = fsub half -0.000000e+00, %r1 + %r1.fneg = fneg half %r1 %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2) store half %r3, half addrspace(1)* %gep.out @@ -247,7 +251,7 @@ %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %r1.fneg = fsub half -0.000000e+00, %r1 + %r1.fneg = fneg half %r1 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2) store half %r3, half addrspace(1)* %gep.out @@ -434,11 +438,11 @@ ; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 [[REGC]], [[REGA]], [[REGB]] ; GCN-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] -; GCN-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] +; GCN-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] ; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] -; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] +; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] ; GFX10-FLUSH: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX10-DENORM-STRICT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX10-DENORM-CONTRACT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] Index: llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -227,6 +227,10 @@ ret void } +; FIXME: The MAD only folds the FSUB(-0,X) when the FNEG(X) transform +; happens in SelectionDAGBuilder. DAGCombiner probably needs to +; be updated to fold the FNEG after visitFSUB(...) runs. + ; XXX ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]], @@ -254,7 +258,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -287,7 +291,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -478,10 +482,10 @@ ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]] ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] -; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] +; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] -; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] +; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -8,6 +8,10 @@ ; fadd tests ; -------------------------------------------------------------------------------- +; FIXME: I think we want to test FNEG(X) folding here. The FSUB(-0,X) case is +; uninteresting. Unless these tests should be split into +; GCN-FLUSH/GCN-DENORM checks. + ; GCN-LABEL: {{^}}v_fneg_add_f32: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] @@ -26,7 +30,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store float %fneg, float addrspace(1)* %out.gep ret void } @@ -47,7 +51,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out store volatile float %add, float addrspace(1)* %out ret void @@ -75,7 +79,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add %use1 = fmul float %add, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -100,9 +104,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -124,9 +128,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %add = fadd float %a, %fneg.b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -148,10 +152,10 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %add = fadd float %fneg.a, %fneg.b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -177,9 +181,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -205,9 +209,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -226,7 +230,7 @@ %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 %.i188 = fadd float %tmp9, 0.000000e+00 %tmp10 = fcmp uge float %.i188, %tmp2 - %tmp11 = fsub float -0.000000e+00, %.i188 + %tmp11 = fneg float %.i188 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 %tmp12 = fcmp ule float %.i092, 0.000000e+00 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 @@ -249,7 +253,7 @@ %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 %.i188 = fadd float %tmp9, 0.000000e+00 %tmp10 = fcmp uge float %.i188, %tmp2 - %tmp11 = fsub float -0.000000e+00, %.i188 + %tmp11 = fneg float %.i188 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 %tmp12 = fcmp ule float %.i092, 0.000000e+00 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 @@ -274,7 +278,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store float %fneg, float addrspace(1)* %out.gep ret void } @@ -295,7 +299,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %mul, float addrspace(1)* %out ret void @@ -318,7 +322,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = fmul float %mul, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -338,9 +342,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -358,9 +362,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %mul = fmul float %a, %fneg.b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -378,10 +382,10 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %mul = fmul float %fneg.a, %fneg.b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -402,9 +406,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -425,9 +429,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -454,7 +458,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -466,7 +470,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -482,7 +486,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float %a, float %a) - %min.fneg = fsub float -0.0, %min + %min.fneg = fneg float %min store float %min.fneg, float addrspace(1)* %out.gep ret void } @@ -493,7 +497,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float %a, float %a) - %min.fneg = fsub float -0.0, %min + %min.fneg = fneg float %min ret float %min.fneg } @@ -509,7 +513,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -520,7 +524,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -536,7 +540,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -547,7 +551,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -562,7 +566,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -579,7 +583,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -602,7 +606,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -624,7 +628,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -730,7 +734,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -750,7 +754,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -779,7 +783,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -793,7 +797,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b ret float %mul } @@ -816,7 +820,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %use1 = fmul float %min, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -831,7 +835,7 @@ ; GCN-NEXT: ; return define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %use1 = fmul float %min, 4.0 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 @@ -859,7 +863,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -871,7 +875,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -887,7 +891,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float %a, float %a) - %max.fneg = fsub float -0.0, %max + %max.fneg = fneg float %max store float %max.fneg, float addrspace(1)* %out.gep ret void } @@ -898,7 +902,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float %a, float %a) - %max.fneg = fsub float -0.0, %max + %max.fneg = fneg float %max ret float %max.fneg } @@ -914,7 +918,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -925,7 +929,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -941,7 +945,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -952,7 +956,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -967,7 +971,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -984,7 +988,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -995,7 +999,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -1015,7 +1019,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -1029,7 +1033,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %mul = fmul float %fneg, %b ret float %mul } @@ -1052,7 +1056,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %use1 = fmul float %max, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1067,7 +1071,7 @@ ; GCN-NEXT: ; return define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %use1 = fmul float %max, 4.0 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 @@ -1099,7 +1103,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1123,7 +1127,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out store volatile float %fma, float addrspace(1)* %out ret void @@ -1154,7 +1158,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fma, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1182,9 +1186,9 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1210,9 +1214,9 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1238,10 +1242,10 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1267,10 +1271,10 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.c = fsub float -0.000000e+00, %c + %fneg.a = fneg float %a + %fneg.c = fneg float %c %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1296,9 +1300,9 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.c = fsub float -0.000000e+00, %c + %fneg.c = fneg float %c %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1330,9 +1334,9 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -1360,9 +1364,9 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fneg.a, %d store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1394,7 +1398,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1446,7 +1450,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fma, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1483,7 +1487,7 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpext = fpext float %fneg.a to double %fneg = fsub double -0.000000e+00, %fpext store double %fneg, double addrspace(1)* %out.gep @@ -1502,7 +1506,7 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpext = fpext float %fneg.a to double %fneg = fsub double -0.000000e+00, %fpext store volatile double %fneg, double addrspace(1)* %out.gep @@ -1559,7 +1563,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile half, half addrspace(1)* %a.gep %fpext = fpext half %a to float - %fneg = fsub float -0.000000e+00, %fpext + %fneg = fneg float %fpext store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fpext, float addrspace(1)* %out.gep ret void @@ -1573,7 +1577,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile half, half addrspace(1)* %a.gep %fpext = fpext half %a to float - %fneg = fsub float -0.000000e+00, %fpext + %fneg = fneg float %fpext %mul = fmul float %fpext, 4.0 store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %mul, float addrspace(1)* %out.gep @@ -1595,7 +1599,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile double, double addrspace(1)* %a.gep %fpround = fptrunc double %a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1612,7 +1616,7 @@ %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1631,7 +1635,7 @@ %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store volatile float %fneg, float addrspace(1)* %out.gep store volatile double %fneg.a, double addrspace(1)* undef ret void @@ -1652,7 +1656,7 @@ %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround %use1 = fmul double %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out.gep store volatile double %use1, double addrspace(1)* undef @@ -1685,7 +1689,7 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround store half %fneg, half addrspace(1)* %out.gep @@ -1705,7 +1709,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile double, double addrspace(1)* %a.gep %fpround = fptrunc double %a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fpround, float addrspace(1)* %out.gep ret void @@ -1723,7 +1727,7 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround store volatile half %fneg, half addrspace(1)* %out.gep @@ -1743,7 +1747,7 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround %use1 = fmul float %fneg.a, %c @@ -1767,7 +1771,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %rcp = call float @llvm.amdgcn.rcp.f32(float %a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1782,9 +1786,9 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1801,9 +1805,9 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fneg.a, float addrspace(1)* undef ret void @@ -1821,9 +1825,9 @@ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %use1, float addrspace(1)* undef @@ -1848,7 +1852,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1869,7 +1873,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %mul, float addrspace(1)* %out ret void @@ -1891,7 +1895,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1911,9 +1915,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1931,9 +1935,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1951,10 +1955,10 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1974,9 +1978,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -1997,9 +2001,9 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -2023,7 +2027,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %sin = call float @llvm.sin.f32(float %a) - %fneg = fsub float -0.000000e+00, %sin + %fneg = fneg float %sin store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2039,7 +2043,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %sin = call float @llvm.amdgcn.sin.f32(float %a) - %fneg = fsub float -0.0, %sin + %fneg = fneg float %sin store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2059,7 +2063,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %trunc = call float @llvm.trunc.f32(float %a) - %fneg = fsub float -0.0, %trunc + %fneg = fneg float %trunc store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2086,7 +2090,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %round = call float @llvm.round.f32(float %a) - %fneg = fsub float -0.0, %round + %fneg = fneg float %round store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2106,7 +2110,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %rint = call float @llvm.rint.f32(float %a) - %fneg = fsub float -0.0, %rint + %fneg = fneg float %rint store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2126,7 +2130,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %nearbyint = call float @llvm.nearbyint.f32(float %a) - %fneg = fsub float -0.0, %nearbyint + %fneg = fneg float %nearbyint store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2146,7 +2150,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %trunc = call float @llvm.canonicalize.f32(float %a) - %fneg = fsub float -0.0, %trunc + %fneg = fneg float %trunc store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2170,7 +2174,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) store volatile float %intrp0, float addrspace(1)* %out.gep @@ -2193,7 +2197,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) store volatile float %intrp0, float addrspace(1)* %out.gep @@ -2230,7 +2234,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %cmp0 = icmp eq i32 %d, 0 br i1 %cmp0, label %if, label %endif @@ -2266,7 +2270,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul call void asm sideeffect "; use $0", "v"(float %fneg) #0 store volatile float %fneg, float addrspace(1)* %out.gep ret void @@ -2295,7 +2299,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul call void asm sideeffect "; use $0", "v"(float %fneg) #0 store volatile float %mul, float addrspace(1)* %out.gep ret void @@ -2328,7 +2332,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) @@ -2360,7 +2364,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %mul0 = fmul float %fneg.a, %b %mul1 = fmul float %fneg.a, %c @@ -2391,7 +2395,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) %mul1 = fmul float %fneg.a, %c @@ -2433,7 +2437,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) - %fneg.fma0 = fsub float -0.0, %fma0 + %fneg.fma0 = fneg float %fma0 %mul1 = fmul float %fneg.fma0, %c %mul2 = fmul float %fneg.fma0, %d @@ -2501,7 +2505,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %trunc.a = call float @llvm.trunc.f32(float %a) - %trunc.fneg.a = fsub float -0.0, %trunc.a + %trunc.fneg.a = fneg float %trunc.a %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) store volatile float %fma0, float addrspace(1)* %out ret void @@ -2531,7 +2535,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %trunc.a = call float @llvm.trunc.f32(float %a) - %trunc.fneg.a = fsub float -0.0, %trunc.a + %trunc.fneg.a = fneg float %trunc.a %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) %mul1 = fmul float %trunc.a, %d store volatile float %fma0, float addrspace(1)* %out Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll @@ -32,7 +32,7 @@ ; GCN-LABEL: {{^}}test_fneg_fmed3_multi_use: ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} -; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]] +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[MED3]], 4.0 define amdgpu_kernel void @test_fneg_fmed3_multi_use(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2) %neg.med3 = fsub float -0.0, %med3 Index: llvm/test/CodeGen/AMDGPU/selectcc-opt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll +++ llvm/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Not sure what to do about these tests. The FSUB(-0.0,X) is being +; folded into the select, before there's a chance to convert to FNEG(X). ; FUNC-LABEL: {{^}}test_a: ; EG-NOT: CND @@ -11,7 +13,7 @@ entry: %0 = fcmp olt float %in, 0.000000e+00 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 %4 = bitcast i32 %3 to float %5 = bitcast float %4 to i32 @@ -39,7 +41,7 @@ entry: %0 = fcmp olt float %in, 0.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 %4 = bitcast i32 %3 to float %5 = bitcast float %4 to i32 Index: llvm/test/CodeGen/AMDGPU/set-dx10.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/set-dx10.ll +++ llvm/test/CodeGen/AMDGPU/set-dx10.ll @@ -4,6 +4,9 @@ ; to store integer true (-1) and false (0) values are lowered to one of the ; SET*DX10 instructions. +; FIXME: Not sure what to do about these tests. The FSUB(-0.0,X) is being +; folded into the select, before there's a chance to convert to FNEG(X). + ; CHECK: {{^}}fcmp_une_select_fptosi: ; CHECK: LSHR ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, @@ -12,7 +15,7 @@ entry: %0 = fcmp une float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -38,7 +41,7 @@ entry: %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -64,7 +67,7 @@ entry: %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -90,7 +93,7 @@ entry: %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -116,7 +119,7 @@ entry: %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -142,7 +145,7 @@ entry: %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void