Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -109,6 +109,9 @@ /// Return a MachinePointerInfo record that refers to a GOT entry. static MachinePointerInfo getGOT(MachineFunction &MF); + /// Return a MachinePointerInfo record that refers to floating-point status. + static MachinePointerInfo getFPStatus(MachineFunction &MF); + /// Stack pointer relative access. static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID = 0); @@ -275,6 +278,13 @@ /// operations. bool isUnordered() const { return !isVolatile(); } + /// Return true if this memory operand represents the floating-point status. + bool isFPStatus() const { + if (auto *PSV = getPseudoValue()) + return PSV->isFPStatus(); + return false; + } + /// Update this MachineMemOperand to reflect the alignment of MMO, if it has a /// greater alignment. This must only be used when the new alignment applies /// to all users of this MachineMemOperand. Index: include/llvm/CodeGen/PseudoSourceValue.h =================================================================== --- include/llvm/CodeGen/PseudoSourceValue.h +++ include/llvm/CodeGen/PseudoSourceValue.h @@ -41,6 +41,7 @@ GOT, JumpTable, ConstantPool, + FPStatus, FixedStack, GlobalValueCallEntry, ExternalSymbolCallEntry, @@ -70,6 +71,7 @@ bool isGOT() const { return Kind == GOT; } bool isConstantPool() const { return Kind == ConstantPool; } bool isJumpTable() const { return Kind == JumpTable; } + bool isFPStatus() const { return Kind == FPStatus; } unsigned getAddressSpace() const { return AddressSpace; } @@ -156,7 +158,8 @@ /// Manages creation of pseudo source values. class PseudoSourceValueManager { const TargetInstrInfo &TII; - const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV; + const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV, + FPStatusPSV; std::map> FSValues; StringMap> ExternalCallEntries; @@ -184,6 +187,11 @@ /// are constant, this doesn't need to identify a specific jump table. const PseudoSourceValue *getJumpTable(); + /// Return a pseudo source value referencing target floating-point status + /// (e.g. exception status flags). This is represented as a "memory" object + /// even if a target may actually hold this status in a special register. + const PseudoSourceValue *getFPStatus(); + /// Return a pseudo source value referencing a fixed stack frame entry, /// e.g., a spill slot. const PseudoSourceValue *getFixedStack(int FI); Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -637,7 +637,7 @@ } /// Test if this node is a strict floating point pseudo-op. - bool isStrictFPOpcode() { + bool isStrictFPOpcode() const { switch (NodeType) { default: return false; @@ -1344,6 +1344,7 @@ N->getOpcode() == ISD::MGATHER || N->getOpcode() == ISD::MSCATTER || N->isMemIntrinsic() || + N->isStrictFPOpcode() || N->isTargetMemoryOpcode(); } }; @@ -1411,6 +1412,7 @@ // We lower some target intrinsics to their target opcode // early a node with a target opcode can be of this class return N->isMemIntrinsic() || + N->isStrictFPOpcode() || N->getOpcode() == ISD::PREFETCH || N->isTargetMemoryOpcode(); } Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -445,6 +445,37 @@ def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; +def strict_fadd : SDNode<"ISD::STRICT_FADD", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand, + SDNPCommutative]>; +def strict_fsub : SDNode<"ISD::STRICT_FSUB", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fmul : SDNode<"ISD::STRICT_FMUL", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand, + SDNPCommutative]>; +def strict_fdiv : SDNode<"ISD::STRICT_FDIV", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_frem : SDNode<"ISD::STRICT_FREM", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fma : SDNode<"ISD::STRICT_FMA", + SDTFPTernaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fsin : SDNode<"ISD::STRICT_FSIN", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fcos : SDNode<"ISD::STRICT_FCOS", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fpow : SDNode<"ISD::STRICT_FPOW", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_frint : SDNode<"ISD::STRICT_FRINT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; + def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -105,6 +105,7 @@ kw_got, kw_jump_table, kw_constant_pool, + kw_fp_status, kw_call_entry, kw_liveout, kw_address_taken, Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -237,6 +237,7 @@ .Case("got", MIToken::kw_got) .Case("jump-table", MIToken::kw_jump_table) .Case("constant-pool", MIToken::kw_constant_pool) + .Case("fp-status", MIToken::kw_fp_status) .Case("call-entry", MIToken::kw_call_entry) .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -2310,6 +2310,9 @@ case MIToken::kw_constant_pool: PSV = MF.getPSVManager().getConstantPool(); break; + case MIToken::kw_fp_status: + PSV = MF.getPSVManager().getFPStatus(); + break; case MIToken::FixedStackObject: { int FI; if (parseFixedStackFrameIndex(FI)) Index: lib/CodeGen/MachineOperand.cpp =================================================================== --- lib/CodeGen/MachineOperand.cpp +++ lib/CodeGen/MachineOperand.cpp @@ -967,6 +967,10 @@ return MachinePointerInfo(MF.getPSVManager().getGOT()); } +MachinePointerInfo MachinePointerInfo::getFPStatus(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getFPStatus()); +} + MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, int64_t Offset, uint8_t ID) { return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID); @@ -1098,6 +1102,9 @@ case PseudoSourceValue::ConstantPool: OS << "constant-pool"; break; + case PseudoSourceValue::FPStatus: + OS << "fp-status"; + break; case PseudoSourceValue::FixedStack: { int FrameIndex = cast(PVal)->getFrameIndex(); bool IsFixed = true; Index: lib/CodeGen/PseudoSourceValue.cpp =================================================================== --- lib/CodeGen/PseudoSourceValue.cpp +++ lib/CodeGen/PseudoSourceValue.cpp @@ -22,7 +22,7 @@ using namespace llvm; static const char *const PSVNames[] = { - "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", + "Stack", "GOT", "JumpTable", "ConstantPool", "FPStatus", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII) @@ -41,7 +41,7 @@ } bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { - if (isStack()) + if (isStack() || isFPStatus()) return false; if (isGOT() || isConstantPool() || isJumpTable()) return true; @@ -49,7 +49,7 @@ } bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const { - if (isStack() || isGOT() || isConstantPool() || isJumpTable()) + if (isStack() || isGOT() || isConstantPool() || isJumpTable() || isFPStatus()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); } @@ -110,7 +110,8 @@ StackPSV(PseudoSourceValue::Stack, TII), GOTPSV(PseudoSourceValue::GOT, TII), JumpTablePSV(PseudoSourceValue::JumpTable, TII), - ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {} + ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII), + FPStatusPSV(PseudoSourceValue::FPStatus, TII) {} const PseudoSourceValue *PseudoSourceValueManager::getStack() { return &StackPSV; @@ -126,6 +127,10 @@ return &JumpTablePSV; } +const PseudoSourceValue *PseudoSourceValueManager::getFPStatus() { + return &FPStatusPSV; +} + const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr &V = FSValues[FI]; Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6040,6 +6040,24 @@ Opcode == ISD::PREFETCH || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || + Opcode == ISD::STRICT_FADD || + Opcode == ISD::STRICT_FSUB || + Opcode == ISD::STRICT_FMUL || + Opcode == ISD::STRICT_FDIV || + Opcode == ISD::STRICT_FREM || + Opcode == ISD::STRICT_FMA || + Opcode == ISD::STRICT_FSQRT || + Opcode == ISD::STRICT_FPOW || + Opcode == ISD::STRICT_FPOWI || + Opcode == ISD::STRICT_FSIN || + Opcode == ISD::STRICT_FCOS || + Opcode == ISD::STRICT_FEXP || + Opcode == ISD::STRICT_FEXP2 || + Opcode == ISD::STRICT_FLOG || + Opcode == ISD::STRICT_FLOG10 || + Opcode == ISD::STRICT_FLOG2 || + Opcode == ISD::STRICT_FRINT || + Opcode == ISD::STRICT_FNEARBYINT || ((int)Opcode <= std::numeric_limits::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6167,6 +6167,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { + MachineFunction &MF = DAG.getMachineFunction(); SDLoc sdl = getCurSDLoc(); unsigned Opcode; switch (FPI.getIntrinsicID()) { @@ -6233,19 +6234,27 @@ ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); + EVT MemVT = EVT::getIntegerVT(*Context, 8); + MachinePointerInfo MPInfo = MachinePointerInfo::getFPStatus(MF); SDValue Result; if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)) }, + MemVT, MPInfo); else if (FPI.isTernaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)), - getValue(FPI.getArgOperand(2)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }, + MemVT, MPInfo); else - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }, + MemVT, MPInfo); assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1084,7 +1084,9 @@ // node should be mutated. // // FIXME: The backends need a way to handle FP constraints. - if (Node->isStrictFPOpcode()) + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) Node = CurDAG->mutateStrictFPToFP(Node); DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; @@ -3590,7 +3592,8 @@ // loads. if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated - // instruction may load or store. + // instruction may load or store. As an exception to this, + // always attach floating-point status operands. const MCInstrDesc &MCID = TII->get(TargetOpc); bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); @@ -3598,7 +3601,9 @@ unsigned NumMemRefs = 0; for (SmallVectorImpl::const_iterator I = MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { - if ((*I)->isLoad()) { + if ((*I)->isFPStatus()) { + ++NumMemRefs; + } else if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; } else if ((*I)->isStore()) { @@ -3615,7 +3620,9 @@ MachineSDNode::mmo_iterator MemRefsPos = MemRefs; for (SmallVectorImpl::const_iterator I = MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { - if ((*I)->isLoad()) { + if ((*I)->isFPStatus()) { + *MemRefsPos++ = *I; + } else if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; } else if ((*I)->isStore()) { Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -638,6 +638,26 @@ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + // Constrained floating-point operations default to expand. + setOperationAction(ISD::STRICT_FADD, VT, Expand); + setOperationAction(ISD::STRICT_FSUB, VT, Expand); + setOperationAction(ISD::STRICT_FMUL, VT, Expand); + setOperationAction(ISD::STRICT_FDIV, VT, Expand); + setOperationAction(ISD::STRICT_FREM, VT, Expand); + setOperationAction(ISD::STRICT_FMA, VT, Expand); + setOperationAction(ISD::STRICT_FSQRT, VT, Expand); + setOperationAction(ISD::STRICT_FPOW, VT, Expand); + setOperationAction(ISD::STRICT_FPOWI, VT, Expand); + setOperationAction(ISD::STRICT_FSIN, VT, Expand); + setOperationAction(ISD::STRICT_FCOS, VT, Expand); + setOperationAction(ISD::STRICT_FEXP, VT, Expand); + setOperationAction(ISD::STRICT_FEXP2, VT, Expand); + setOperationAction(ISD::STRICT_FLOG, VT, Expand); + setOperationAction(ISD::STRICT_FLOG10, VT, Expand); + setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_FRINT, VT, Expand); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } Index: lib/Target/AMDGPU/R600InstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/R600InstrInfo.cpp +++ lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1502,6 +1502,7 @@ case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: + case PseudoSourceValue::FPStatus: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -401,6 +401,18 @@ setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + } } } Index: lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFP.td +++ lib/Target/SystemZ/SystemZInstrFP.td @@ -359,6 +359,14 @@ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; + +def : Pat<(strict_fsqrt FP32:$src), (SQEBR FP32:$src)>; +def : Pat<(strict_fsqrt FP64:$src), (SQDBR FP64:$src)>; +def : Pat<(strict_fsqrt FP128:$src), (SQXBR FP128:$src)>; +def : Pat<(strict_fsqrt (f32 (load bdxaddr12only:$addr))), + (SQEB bdxaddr12only:$addr)>; +def : Pat<(strict_fsqrt (f64 (load bdxaddr12only:$addr))), + (SQDB bdxaddr12only:$addr)>; // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. These forms always check for inexact conditions. @@ -371,6 +379,9 @@ def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; +def : Pat<(strict_frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(strict_frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(strict_frint FP128:$src), (FIXBR 0, FP128:$src)>; let Predicates = [FeatureFPExtension] in { // Extended forms of the FIxBR instructions. M4 can be set to 4 @@ -383,6 +394,9 @@ def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(strict_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(strict_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(strict_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. @@ -422,6 +436,16 @@ def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; } +def : Pat<(strict_fadd FP32:$src1, FP32:$src2), + (AEBR FP32:$src1, FP32:$src2)>; +def : Pat<(strict_fadd FP64:$src1, FP64:$src2), + (ADBR FP64:$src1, FP64:$src2)>; +def : Pat<(strict_fadd FP128:$src1, FP128:$src2), + (AXBR FP128:$src1, FP128:$src2)>; +def : Pat<(strict_fadd FP32:$src1, (load bdxaddr12only:$addr)), + (AEB FP32:$src1, bdxaddr12only:$addr)>; +def : Pat<(strict_fadd FP64:$src1, (load bdxaddr12only:$addr)), + (ADB FP64:$src1, bdxaddr12only:$addr)>; // Subtraction. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { @@ -432,6 +456,16 @@ def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; } +def : Pat<(strict_fsub FP32:$src1, FP32:$src2), + (SEBR FP32:$src1, FP32:$src2)>; +def : Pat<(strict_fsub FP64:$src1, FP64:$src2), + (SDBR FP64:$src1, FP64:$src2)>; +def : Pat<(strict_fsub FP128:$src1, FP128:$src2), + (SXBR FP128:$src1, FP128:$src2)>; +def : Pat<(strict_fsub FP32:$src1, (load bdxaddr12only:$addr)), + (SEB FP32:$src1, bdxaddr12only:$addr)>; +def : Pat<(strict_fsub FP64:$src1, (load bdxaddr12only:$addr)), + (SDB FP64:$src1, bdxaddr12only:$addr)>; // Multiplication. let isCommutable = 1 in { @@ -441,12 +475,27 @@ } def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; + +def : Pat<(strict_fmul FP32:$src1, FP32:$src2), + (MEEBR FP32:$src1, FP32:$src2)>; +def : Pat<(strict_fmul FP64:$src1, FP64:$src2), + (MDBR FP64:$src1, FP64:$src2)>; +def : Pat<(strict_fmul FP128:$src1, FP128:$src2), + (MXBR FP128:$src1, FP128:$src2)>; +def : Pat<(strict_fmul FP32:$src1, (load bdxaddr12only:$addr)), + (MEEB FP32:$src1, bdxaddr12only:$addr)>; +def : Pat<(strict_fmul FP64:$src1, (load bdxaddr12only:$addr)), + (MDB FP64:$src1, bdxaddr12only:$addr)>; // f64 multiplication of two FP32 registers. def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), FP32:$src2)>; +def : Pat<(strict_fmul (f64 (fpextend FP32:$src1)), + (f64 (fpextend FP32:$src2))), + (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + FP32:$src1, subreg_r32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; @@ -454,21 +503,35 @@ (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), bdxaddr12only:$addr)>; +def : Pat<(strict_fmul (f64 (fpextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), + bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; -let Predicates = [FeatureNoVectorEnhancements1] in +let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), FP64:$src2)>; + def : Pat<(strict_fmul (f128 (fpextend FP64:$src1)), + (f128 (fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; +} // f128 multiplication of an FP64 register and an f64 memory. def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; -let Predicates = [FeatureNoVectorEnhancements1] in +let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; + def : Pat<(strict_fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; +} // Fused multiply-add. def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; @@ -476,6 +539,15 @@ def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; + +def : Pat<(z_strict_fma FP32:$src1, FP32:$src2, FP32:$src3), + (MAEBR FP32:$src1, FP32:$src2, FP32:$src3)>; +def : Pat<(z_strict_fma FP64:$src1, FP64:$src2, FP64:$src3), + (MADBR FP64:$src1, FP64:$src2, FP64:$src3)>; +def : Pat<(z_strict_fma FP32:$src1, FP32:$src2, (load bdxaddr12only:$addr)), + (MAEB FP32:$src1, FP32:$src2, bdxaddr12only:$addr)>; +def : Pat<(z_strict_fma FP64:$src1, FP64:$src2, (load bdxaddr12only:$addr)), + (MADB FP64:$src1, FP64:$src2, bdxaddr12only:$addr)>; // Fused multiply-subtract. def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; @@ -483,6 +555,15 @@ def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; + +def : Pat<(z_strict_fms FP32:$src1, FP32:$src2, FP32:$src3), + (MSEBR FP32:$src1, FP32:$src2, FP32:$src3)>; +def : Pat<(z_strict_fms FP64:$src1, FP64:$src2, FP64:$src3), + (MSDBR FP64:$src1, FP64:$src2, FP64:$src3)>; +def : Pat<(z_strict_fms FP32:$src1, FP32:$src2, (load bdxaddr12only:$addr)), + (MSEB FP32:$src1, FP32:$src2, bdxaddr12only:$addr)>; +def : Pat<(z_strict_fms FP64:$src1, FP64:$src2, (load bdxaddr12only:$addr)), + (MSDB FP64:$src1, FP64:$src2, bdxaddr12only:$addr)>; // Division. def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; @@ -491,6 +572,17 @@ def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; + +def : Pat<(strict_fdiv FP32:$src1, FP32:$src2), + (DEBR FP32:$src1, FP32:$src2)>; +def : Pat<(strict_fdiv FP64:$src1, FP64:$src2), + (DDBR FP64:$src1, FP64:$src2)>; +def : Pat<(strict_fdiv FP128:$src1, FP128:$src2), + (DXBR FP128:$src1, FP128:$src2)>; +def : Pat<(strict_fdiv FP32:$src1, (load bdxaddr12only:$addr)), + (DEB FP32:$src1, bdxaddr12only:$addr)>; +def : Pat<(strict_fdiv FP64:$src1, (load bdxaddr12only:$addr)), + (DDB FP64:$src1, bdxaddr12only:$addr)>; // Divide to integer. let Defs = [CC] in { Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -656,6 +656,10 @@ (fma node:$src2, node:$src3, node:$src1)>; def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), (fma node:$src2, node:$src3, (fneg node:$src1))>; +def z_strict_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (strict_fma node:$src2, node:$src3, node:$src1)>; +def z_strict_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (strict_fma node:$src2, node:$src3, (fneg node:$src1))>; // Negative fused multiply-add and multiply-subtract. def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), Index: test/CodeGen/SystemZ/fp-strict-add-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-01.ll +++ test/CodeGen/SystemZ/fp-strict-add-01.ll @@ -0,0 +1,173 @@ +; Test 32-bit floating-point strict addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) + +; Check register addition. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the AEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned AEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: aeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that AEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: aeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that additions of spilled values can use AEB rather than AEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %add0 = call float @llvm.experimental.constrained.fadd.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add1 = call float @llvm.experimental.constrained.fadd.f32( + float %add0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add2 = call float @llvm.experimental.constrained.fadd.f32( + float %add1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add3 = call float @llvm.experimental.constrained.fadd.f32( + float %add2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add4 = call float @llvm.experimental.constrained.fadd.f32( + float %add3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add5 = call float @llvm.experimental.constrained.fadd.f32( + float %add4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add6 = call float @llvm.experimental.constrained.fadd.f32( + float %add5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add7 = call float @llvm.experimental.constrained.fadd.f32( + float %add6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add8 = call float @llvm.experimental.constrained.fadd.f32( + float %add7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add9 = call float @llvm.experimental.constrained.fadd.f32( + float %add8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add10 = call float @llvm.experimental.constrained.fadd.f32( + float %add9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %add10 +}