diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5037,17 +5037,83 @@ return; } - case X86ISD::CMP: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); + case X86ISD::CMP: + case X86ISD::STRICT_FCMP: + case X86ISD::STRICT_FCMPS: { + bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || + Node->getOpcode() == X86ISD::STRICT_FCMPS; + SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); + SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); + + // Save the original VT of the compare. + MVT CmpVT = N0.getSimpleValueType(); + + // Floating point needs special handling if we don't have FCOMI. + // FIXME: Should we have a X86ISD::FCMP to avoid mixing int and fp? + if (CmpVT.isFloatingPoint()) { + if (Subtarget->hasCMov()) + break; + + bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; + + unsigned Opc; + switch (CmpVT.SimpleTy) { + default: llvm_unreachable("Unexpected type!"); + case MVT::f32: + Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; + break; + case MVT::f64: + Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; + break; + case MVT::f80: + Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; + break; + } + + SDValue Cmp; + SDValue Chain = + IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); + if (IsStrictCmp) { + SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other); + Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); + Chain = Cmp.getValue(1); + } else { + Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0); + } + + // Move FPSW to AX. + SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue()); + Chain = FPSW; + SDValue FNSTSW = + SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW, + FPSW.getValue(1)), + 0); + + // Extract upper 8-bits of AX. + SDValue Extract = + CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); + + // Move AH into flags. + // Some 64-bit targets lack SAHF support, but they do support FCOMI. + assert(Subtarget->hasLAHFSAHF() && + "Target doesn't support SAHF or FCOMI?"); + SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); + Chain = AH; + SDValue SAHF = SDValue( + CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); + + if (IsStrictCmp) + ReplaceUses(SDValue(Node, 1), Chain); + + ReplaceUses(SDValue(Node, 0), SAHF); + CurDAG->RemoveDeadNode(Node); + return; + } // Optimizations for TEST compares. if (!isNullConstant(N1)) break; - // Save the original VT of the compare. - MVT CmpVT = N0.getSimpleValueType(); - // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed // by a test instruction. The test should be removed later by // analyzeCompare if we are using only the zero flag. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -541,12 +541,6 @@ MEMBARRIER, MFENCE, - // Store FP status word into i16 register. - FNSTSW16r, - - // Store contents of %ah into %eflags. - SAHF, - // Get a random integer and indicate whether it is valid in CF. RDRAND, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20947,36 +20947,6 @@ return std::make_pair(Sub.getValue(1), SDValue()); } -/// Convert a comparison if required by the subtarget. -SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, - SelectionDAG &DAG) const { - // If the subtarget does not support the FUCOMI instruction, floating-point - // comparisons have to be converted. - bool IsCmp = Cmp.getOpcode() == X86ISD::CMP; - bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP || - Cmp.getOpcode() == X86ISD::STRICT_FCMPS; - - if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) || - !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() || - !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint()) - return Cmp; - - // The instruction selector will select an FUCOM instruction instead of - // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence - // build an SDNode sequence that transfers the result from FPSW into EFLAGS: - // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8)))) - SDLoc dl(Cmp); - SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); - SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); - SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, - DAG.getConstant(8, dl, MVT::i8)); - SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); - - // Some 64-bit targets lack SAHF support, but they do support FCOMI. - assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); - return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); -} - /// Check if replacement of SQRT with RSQRT should be disabled. bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -21940,7 +21910,6 @@ SDValue EFLAGS = Tmp.first; if (Chain) Chain = Tmp.second; - EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); return EFLAGS; } @@ -22080,8 +22049,7 @@ /// Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getOpcode(); - if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || - Opc == X86ISD::SAHF) + if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) return true; if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || @@ -23033,7 +23001,6 @@ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); - Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); @@ -23049,7 +23016,6 @@ // separate test. SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); - Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); @@ -23081,7 +23047,6 @@ CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); Cond = EmitTest(Cond, X86Cond, dl, DAG, Subtarget); } - Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); } @@ -29729,7 +29694,6 @@ NODE_NAME_CASE(EH_RETURN) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(FNSTCW16m) - NODE_NAME_CASE(FNSTSW16r) NODE_NAME_CASE(LCMPXCHG_DAG) NODE_NAME_CASE(LCMPXCHG8_DAG) NODE_NAME_CASE(LCMPXCHG16_DAG) @@ -29854,7 +29818,6 @@ NODE_NAME_CASE(MEMBARRIER) NODE_NAME_CASE(MFENCE) NODE_NAME_CASE(SEG_ALLOCA) - NODE_NAME_CASE(SAHF) NODE_NAME_CASE(RDRAND) NODE_NAME_CASE(RDSEED) NODE_NAME_CASE(RDPKRU) diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -22,7 +22,6 @@ SDTCisPtrTy<1>]>; def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; -def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -34,7 +33,6 @@ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, @@ -627,19 +625,13 @@ } // SchedRW // Floating point compares. -let SchedRW = [WriteFCom], Uses = [FPCW] in { -def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>; -def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>; -def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>; -def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(set FPSW, (trunc (X86strict_fcmps RFP32:$lhs, RFP32:$rhs)))]>; -def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(set FPSW, (trunc (X86strict_fcmps RFP64:$lhs, RFP64:$rhs)))]>; -def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(set FPSW, (trunc (X86strict_fcmps RFP80:$lhs, RFP80:$rhs)))]>; +let SchedRW = [WriteFCom], Uses = [FPCW], hasSideEffects = 0 in { +def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>; +def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>; +def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>; +def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>; +def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>; +def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>; } // SchedRW } // mayRaiseFPException = 1 @@ -690,10 +682,9 @@ // Floating point flag ops. let SchedRW = [WriteALU] in { -let Defs = [AX, FPSW], Uses = [FPSW] in +let Defs = [AX, FPSW], Uses = [FPSW], hasSideEffects = 0 in def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags - (outs), (ins), "fnstsw\t{%ax|ax}", - [(set AX, (X86fp_stsw FPSW))]>; + (outs), (ins), "fnstsw\t{%ax|ax}", []>; let Defs = [FPSW], Uses = [FPCW] in def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -152,8 +152,6 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; -def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; - def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; @@ -1787,9 +1785,8 @@ // Condition code ops, incl. set if equal/not equal/... let SchedRW = [WriteLAHFSAHF] in { -let Defs = [EFLAGS], Uses = [AH] in -def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", - [(set EFLAGS, (X86sahf AH))]>, +let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in +def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH Requires<[HasLAHFSAHF]>; let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags