diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -937,11 +937,16 @@ BUILTIN_OP_END }; + /// FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations + /// which cannot raise FP exceptions should be less than this value. + /// Those that do must not be less than this value. + static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END+400; + /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations /// which do not reference a specific memory location should be less than /// this value. Those that do must not be less than this value, and can /// be used with SelectionDAG::getMemIntrinsicNode. - static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400; + static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+500; //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -310,6 +310,9 @@ return false; } + /// Return whether the node may raise an FP exception. + bool mayRaiseFPException(SDNode *Node) const; + bool isOrEquivalentToAdd(const SDNode *N) const; private: diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -387,7 +387,7 @@ Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(true) {} + AllowReassociation(false), NoFPExcept(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -450,9 +450,9 @@ setDefined(); AllowReassociation = b; } - void setFPExcept(bool b) { + void setNoFPExcept(bool b) { setDefined(); - NoFPExcept = !b; + NoFPExcept = b; } // These are accessors for each flag. @@ -467,7 +467,7 @@ bool hasAllowContract() const { return AllowContract; } bool hasApproximateFuncs() const { return ApproximateFuncs; } bool hasAllowReassociation() const { return AllowReassociation; } - bool hasFPExcept() const { return !NoFPExcept; } + bool hasNoFPExcept() const { return NoFPExcept; } bool isFast() const { return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept && @@ -666,6 +666,15 @@ /// \ISD namespace). bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } + /// Test if this node has a target-specific opcode that may raise + /// FP exceptions (in the \ISD namespace and greater than + /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory + /// opcode are currently automatically considered to possibly raise + /// FP exceptions as well. + bool isTargetStrictFPOpcode() const { + return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; + } + /// Test if this node has a target-specific /// memory-referencing opcode (in the \ISD namespace and /// greater than FIRST_TARGET_MEMORY_OPCODE). diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -882,7 +882,7 @@ if (Flags.hasExact()) MI->setFlag(MachineInstr::MIFlag::IsExact); - if (Flags.hasFPExcept()) + if (MI->getDesc().mayRaiseFPException() && !Flags.hasNoFPExcept()) MI->setFlag(MachineInstr::MIFlag::FPExcept); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1108,6 +1108,15 @@ Node->intersectFlagsWith(IncomingFlags); } } + // Constrained FP intrinsics with fpexcept.ignore should also get + // the NoFPExcept flag. + if (auto *FPI = dyn_cast(&I)) + if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoFPExcept(true); + Node->setFlags(Flags); + } if (!I.isTerminator() && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally @@ -6972,12 +6981,6 @@ SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); - if (FPI.getExceptionBehavior() != fp::ExceptionBehavior::ebIgnore) { - SDNodeFlags Flags; - Flags.setFPExcept(true); - Result->setFlags(Flags); - } - assert(Result.getNode()->getNumValues() == 2); // See above -- chain is handled like for loads here. SDValue OutChain = Result.getValue(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -547,8 +547,8 @@ if (getFlags().hasVectorReduction()) OS << " vector-reduction"; - if (getFlags().hasFPExcept()) - OS << " fpexcept"; + if (getFlags().hasNoFPExcept()) + OS << " nofpexcept"; if (const MachineSDNode *MN = dyn_cast(this)) { if (!MN->memoperands_empty()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3458,6 +3458,17 @@ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); + // Check whether any matched node could raise an FP exception. Since all + // such nodes must have a chain, it suffices to check ChainNodesMatched. + // We need to perform this check before potentially modifying one of the + // nodes via MorphNode. + bool MayRaiseFPException = false; + for (auto *N : ChainNodesMatched) + if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) { + MayRaiseFPException = true; + break; + } + // Create the node. MachineSDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || @@ -3489,6 +3500,14 @@ Ops, EmitNodeInfo)); } + // Set the NoFPExcept flag when no original matched node could + // raise an FP exception, but the new node potentially might. + if (!MayRaiseFPException && mayRaiseFPException(Res)) { + SDNodeFlags Flags = Res->getFlags(); + Flags.setNoFPExcept(true); + Res->setFlags(Flags); + } + // If the node had chain/glue results, update our notion of the current // chain and glue. if (EmitNodeInfo & OPFL_GlueOutput) { @@ -3644,6 +3663,21 @@ } } +/// Return whether the node may raise an FP exception. +bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const { + // For machine opcodes, consult the MCID flag. + if (N->isMachineOpcode()) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + return MCID.mayRaiseFPException(); + } + + // For ISD opcodes, only StrictFP opcodes may raise an FP + // exception. + if (N->isTargetOpcode()) + return N->isTargetStrictFPOpcode(); + return N->isStrictFPOpcode(); +} + bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); auto *C = dyn_cast(N->getOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6190,8 +6190,10 @@ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can // never raise any exception. SDNodeFlags Flags; - Flags.setFPExcept(Node->getFlags().hasFPExcept()); + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); } else { SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -58,8 +58,7 @@ ICMP, // Floating-point comparisons. The two operands are the values to compare. - // Regular and strict (quiet and signaling) versions. - FCMP, STRICT_FCMP, STRICT_FCMPS, + FCMP, // Test under mask. The first operand is ANDed with the second operand // and the condition codes are set on the result. The third operand is @@ -249,10 +248,9 @@ // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and // greater than" and VFCMPHE for "ordered and greater than or equal to". - // Regular and strict (quiet and signaling) versions. - VFCMPE, STRICT_VFCMPE, STRICT_VFCMPES, - VFCMPH, STRICT_VFCMPH, STRICT_VFCMPHS, - VFCMPHE, STRICT_VFCMPHE, STRICT_VFCMPHES, + VFCMPE, + VFCMPH, + VFCMPHE, // Likewise, but also set the condition codes on the result. VFCMPES, @@ -263,12 +261,12 @@ VFTCI, // Extend the even f32 elements of vector operand 0 to produce a vector - // of f64 elements. Regular and strict versions. - VEXTEND, STRICT_VEXTEND, + // of f64 elements. + VEXTEND, // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. Regular and strict versions. - VROUND, STRICT_VROUND, + // even elements of the result. + VROUND, // AND the two vector operands together and set CC based on the result. VTM, @@ -292,6 +290,24 @@ // Operand 1: the bit mask TDC, + // Strict variants of scalar floating-point comparisons. + // Quiet and signaling versions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Strict variants of vector floating-point comparisons. + // Quiet and signaling versions. + STRICT_VFCMPE, + STRICT_VFCMPH, + STRICT_VFCMPHE, + STRICT_VFCMPES, + STRICT_VFCMPHS, + STRICT_VFCMPHES, + + // Strict variants of VEXTEND and VROUND. + STRICT_VEXTEND, + STRICT_VROUND, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -79,9 +79,6 @@ /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, - /// X86 strict FP compare instructions. - STRICT_FCMP, STRICT_FCMPS, - /// X86 bit-test instructions. BT, @@ -325,7 +322,6 @@ // Vector packed double/float comparison. CMPP, - STRICT_CMPP, // Vector integer comparisons. PCMPEQ, PCMPGT, @@ -338,7 +334,6 @@ /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, - STRICT_CMPM, // Vector comparison with SAE for FP values CMPM_SAE, @@ -506,7 +501,6 @@ // Vector float/double to signed/unsigned integer with truncation. CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, - STRICT_CVTTP2SI, STRICT_CVTTP2UI, // Scalar float/double to signed/unsigned integer with truncation. CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, @@ -605,6 +599,20 @@ // For avx512-vp2intersect VP2INTERSECT, + /// X86 strict FP compare instructions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Vector packed double/float comparison. + STRICT_CMPP, + + /// Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + STRICT_CMPM, + + // Vector float/double to signed/unsigned integer with truncation. + STRICT_CVTTP2SI, STRICT_CVTTP2UI, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags-x86_64.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: name: f20u ; CHECK: liveins: $xmm0 ; CHECK: [[COPY:%[0-9]+]]:fr64 = COPY $xmm0 -; CHECK: [[CVTTSD2SI64rr:%[0-9]+]]:gr64 = CVTTSD2SI64rr [[COPY]], implicit $mxcsr +; CHECK: [[CVTTSD2SI64rr:%[0-9]+]]:gr64 = fpexcept CVTTSD2SI64rr [[COPY]], implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[CVTTSD2SI64rr]].sub_32bit ; CHECK: $eax = COPY [[COPY1]] ; CHECK: RET 0, $eax diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll @@ -29,14 +29,14 @@ ; CHECK-LABEL: name: f20u64 ; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) ; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) -; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr +; CHECK: fpexcept COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD ; CHECK: JCC_1 ; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}} -; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr +; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = fpexcept SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr ; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store 8 into %stack.0) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags -; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0) +; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = fpexcept LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0) ; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store 2 into %stack.1) ; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load 2 from %stack.1) ; CHECK: [[OR32ri:%[0-9]+]]:gr32 = OR32ri killed [[MOVZX32rm16_]], 3072, implicit-def $eflags @@ -59,7 +59,7 @@ define i8 @f20s8(double %x) #0 { entry: ; CHECK-LABEL: name: f20s8 -; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) +; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr32_abcd = COPY [[CVTTSD2SIrm]] ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; CHECK: $al = COPY [[COPY1]] @@ -71,7 +71,7 @@ define i16 @f20s16(double %x) #0 { entry: ; CHECK-LABEL: name: f20s16 -; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) +; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY [[CVTTSD2SIrm]].sub_16bit ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, $ax @@ -84,15 +84,15 @@ ; CHECK-LABEL: name: f20u ; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) ; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) -; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr +; CHECK: fpexcept COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD ; CHECK: JCC_1 ; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}} ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] ; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags -; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr -; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr +; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = fpexcept SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr +; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = fpexcept CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr ; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[CVTTSD2SIrr]], killed [[SHL32ri]], implicit-def dead $eflags ; CHECK: $eax = COPY [[XOR32rr]] ; CHECK: RET 0, $eax diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll @@ -3,7 +3,7 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; CHECK-LABEL: name: constrained_vector_fadd_v1f32 ; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) -; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) +; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) ; CHECK: $xmm0 = COPY [[ADDSSrm]] ; CHECK: RET 0, $xmm0 entry: @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: constrained_vector_fadd_v3f32 ; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS ; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) -; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr -; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) -; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool) +; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = fpexcept ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr +; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) +; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = fpexcept ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool) ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[ADDSSrm1]] ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[ADDSSrm]] ; CHECK: [[UNPCKLPSrr:%[0-9]+]]:vr128 = UNPCKLPSrr [[COPY1]], killed [[COPY]] @@ -38,8 +38,8 @@ define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: name: constrained_vector_fadd_v4f64 ; CHECK: [[MOVAPDrm:%[0-9]+]]:vr128 = MOVAPDrm $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool) -; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool) -; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool) +; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = fpexcept ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool) +; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = fpexcept ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool) ; CHECK: $xmm0 = COPY [[ADDPDrm]] ; CHECK: $xmm1 = COPY [[ADDPDrm1]] ; CHECK: RET 0, $xmm0, $xmm1