Index: include/llvm/CodeGen/MachineInstr.h =================================================================== --- include/llvm/CodeGen/MachineInstr.h +++ include/llvm/CodeGen/MachineInstr.h @@ -22,6 +22,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" @@ -656,12 +657,19 @@ /// Return true if this instruction could possibly read memory. /// Instructions with this flag set are not necessarily simple load /// instructions, they may load a value and modify it, for example. + /// Also returns true if an instruction is marked mayAccessMemory + /// *and* carries a memory operand representing a load. bool mayLoad(QueryType Type = AnyInBundle) const { if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_MayLoad) return true; } + if (hasProperty(MCID::MayAccessMemory, Type)) { + for (const auto *MMO : memoperands()) + if (MMO->isLoad()) + return true; + } return hasProperty(MCID::MayLoad, Type); } @@ -669,12 +677,19 @@ /// Instructions with this flag set are not necessarily simple store /// instructions, they may store a modified value based on their operands, or /// may not actually modify anything, for example. + /// Also returns true if an instruction is marked mayAccessMemory + /// *and* carries a memory operand representing a store. bool mayStore(QueryType Type = AnyInBundle) const { if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_MayStore) return true; } + if (hasProperty(MCID::MayAccessMemory, Type)) { + for (const auto *MMO : memoperands()) + if (MMO->isStore()) + return true; + } return hasProperty(MCID::MayStore, Type); } Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -109,6 +109,9 @@ /// Return a MachinePointerInfo record that refers to a GOT entry. static MachinePointerInfo getGOT(MachineFunction &MF); + /// Return a MachinePointerInfo record that refers to floating-point status. + static MachinePointerInfo getFPStatus(MachineFunction &MF); + /// Stack pointer relative access. static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID = 0); Index: include/llvm/CodeGen/PseudoSourceValue.h =================================================================== --- include/llvm/CodeGen/PseudoSourceValue.h +++ include/llvm/CodeGen/PseudoSourceValue.h @@ -41,6 +41,7 @@ GOT, JumpTable, ConstantPool, + FPStatus, FixedStack, GlobalValueCallEntry, ExternalSymbolCallEntry, @@ -70,6 +71,7 @@ bool isGOT() const { return Kind == GOT; } bool isConstantPool() const { return Kind == ConstantPool; } bool isJumpTable() const { return Kind == JumpTable; } + bool isFPStatus() const { return Kind == FPStatus; } unsigned getAddressSpace() const { return AddressSpace; } @@ -156,7 +158,8 @@ /// Manages creation of pseudo source values. class PseudoSourceValueManager { const TargetInstrInfo &TII; - const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV; + const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV, + FPStatusPSV; std::map> FSValues; StringMap> ExternalCallEntries; @@ -184,6 +187,11 @@ /// are constant, this doesn't need to identify a specific jump table. const PseudoSourceValue *getJumpTable(); + /// Return a pseudo source value referencing target floating-point status + /// (e.g. exception status flags). This is represented as a "memory" object + /// even if a target may actually hold this status in a special register. + const PseudoSourceValue *getFPStatus(); + /// Return a pseudo source value referencing a fixed stack frame entry, /// e.g., a spill slot. const PseudoSourceValue *getFixedStack(int FI); Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -650,7 +650,7 @@ } /// Test if this node is a strict floating point pseudo-op. - bool isStrictFPOpcode() { + bool isStrictFPOpcode() const { switch (NodeType) { default: return false; @@ -1357,6 +1357,7 @@ N->getOpcode() == ISD::MGATHER || N->getOpcode() == ISD::MSCATTER || N->isMemIntrinsic() || + N->isStrictFPOpcode() || N->isTargetMemoryOpcode(); } }; @@ -1424,6 +1425,7 @@ // We lower some target intrinsics to their target opcode // early a node with a target opcode can be of this class return N->isMemIntrinsic() || + N->isStrictFPOpcode() || N->getOpcode() == ISD::PREFETCH || N->isTargetMemoryOpcode(); } Index: include/llvm/MC/MCInstrDesc.h =================================================================== --- include/llvm/MC/MCInstrDesc.h +++ include/llvm/MC/MCInstrDesc.h @@ -134,6 +134,7 @@ FoldableAsLoad, MayLoad, MayStore, + MayAccessMemory, Predicable, NotDuplicable, UnmodeledSideEffects, @@ -393,6 +394,13 @@ /// may not actually modify anything, for example. bool mayStore() const { return Flags & (1ULL << MCID::MayStore); } + /// Return true if this instruction may optionally access memory. + /// Such instructions are only considered to actually access memory + /// if they carry MachineMemOperand entries. + bool mayAccessMemory() const { + return Flags & (1ULL << MCID::MayAccessMemory); + } + /// Return true if this instruction has side /// effects that are not modeled by other flags. This does not return true /// for instructions whose effects are captured by: Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -451,6 +451,7 @@ bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? bit mayLoad = ?; // Is it possible for this inst to read memory? bit mayStore = ?; // Is it possible for this inst to write memory? + bit mayAccessMemory = 0; // Can this inst have optional memory operands? bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -445,6 +445,37 @@ def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; +def strict_fadd : SDNode<"ISD::STRICT_FADD", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand, + SDNPCommutative]>; +def strict_fsub : SDNode<"ISD::STRICT_FSUB", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fmul : SDNode<"ISD::STRICT_FMUL", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand, + SDNPCommutative]>; +def strict_fdiv : SDNode<"ISD::STRICT_FDIV", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_frem : SDNode<"ISD::STRICT_FREM", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fma : SDNode<"ISD::STRICT_FMA", + SDTFPTernaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fsin : SDNode<"ISD::STRICT_FSIN", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fcos : SDNode<"ISD::STRICT_FCOS", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fpow : SDNode<"ISD::STRICT_FPOW", + SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_frint : SDNode<"ISD::STRICT_FRINT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; +def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", + SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>; + def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; @@ -1131,6 +1162,52 @@ def setne : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETNE)>; +// Convenience fragments to match both strict and non-strict fp operations +def any_fadd : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_fadd node:$lhs, node:$rhs), + (fadd node:$lhs, node:$rhs))>; +def any_fsub : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_fsub node:$lhs, node:$rhs), + (fsub node:$lhs, node:$rhs))>; +def any_fmul : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_fmul node:$lhs, node:$rhs), + (fmul node:$lhs, node:$rhs))>; +def any_fdiv : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_fdiv node:$lhs, node:$rhs), + (fdiv node:$lhs, node:$rhs))>; +def any_frem : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_frem node:$lhs, node:$rhs), + (frem node:$lhs, node:$rhs))>; +def any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (alternative (strict_fma node:$src1, node:$src2, + node:$src3), + (fma node:$src1, node:$src2, + node:$src3))>; +def any_fsqrt : PatFrag<(ops node:$src), + (alternative (strict_fsqrt node:$src), + (fsqrt node:$src))>; +def any_fsin : PatFrag<(ops node:$src), + (alternative (strict_fsin node:$src), + (fsin node:$src))>; +def any_fcos : PatFrag<(ops node:$src), + (alternative (strict_fcos node:$src), + (fcos node:$src))>; +def any_fexp2 : PatFrag<(ops node:$src), + (alternative (strict_fexp2 node:$src), + (fexp2 node:$src))>; +def any_fpow : PatFrag<(ops node:$lhs, node:$rhs), + (alternative (strict_fpow node:$lhs, node:$rhs), + (fpow node:$lhs, node:$rhs))>; +def any_flog2 : PatFrag<(ops node:$src), + (alternative (strict_flog2 node:$src), + (flog2 node:$src))>; +def any_frint : PatFrag<(ops node:$src), + (alternative (strict_frint node:$src), + (frint node:$src))>; +def any_fnearbyint : PatFrag<(ops node:$src), + (alternative (strict_fnearbyint node:$src), + (fnearbyint node:$src))>; + multiclass binary_atomic_op_ord { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast(#NAME) node:$ptr, node:$val)> { Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -105,6 +105,7 @@ kw_got, kw_jump_table, kw_constant_pool, + kw_fp_status, kw_call_entry, kw_liveout, kw_address_taken, Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -237,6 +237,7 @@ .Case("got", MIToken::kw_got) .Case("jump-table", MIToken::kw_jump_table) .Case("constant-pool", MIToken::kw_constant_pool) + .Case("fp-status", MIToken::kw_fp_status) .Case("call-entry", MIToken::kw_call_entry) .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -2313,6 +2313,9 @@ case MIToken::kw_constant_pool: PSV = MF.getPSVManager().getConstantPool(); break; + case MIToken::kw_fp_status: + PSV = MF.getPSVManager().getFPStatus(); + break; case MIToken::FixedStackObject: { int FI; if (parseFixedStackFrameIndex(FI)) Index: lib/CodeGen/MachineOperand.cpp =================================================================== --- lib/CodeGen/MachineOperand.cpp +++ lib/CodeGen/MachineOperand.cpp @@ -967,6 +967,10 @@ return MachinePointerInfo(MF.getPSVManager().getGOT()); } +MachinePointerInfo MachinePointerInfo::getFPStatus(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getFPStatus()); +} + MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, int64_t Offset, uint8_t ID) { return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID); @@ -1098,6 +1102,9 @@ case PseudoSourceValue::ConstantPool: OS << "constant-pool"; break; + case PseudoSourceValue::FPStatus: + OS << "fp-status"; + break; case PseudoSourceValue::FixedStack: { int FrameIndex = cast(PVal)->getFrameIndex(); bool IsFixed = true; Index: lib/CodeGen/PseudoSourceValue.cpp =================================================================== --- lib/CodeGen/PseudoSourceValue.cpp +++ lib/CodeGen/PseudoSourceValue.cpp @@ -22,7 +22,7 @@ using namespace llvm; static const char *const PSVNames[] = { - "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", + "Stack", "GOT", "JumpTable", "ConstantPool", "FPStatus", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII) @@ -41,7 +41,7 @@ } bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { - if (isStack()) + if (isStack() || isFPStatus()) return false; if (isGOT() || isConstantPool() || isJumpTable()) return true; @@ -49,7 +49,7 @@ } bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const { - if (isStack() || isGOT() || isConstantPool() || isJumpTable()) + if (isStack() || isGOT() || isConstantPool() || isJumpTable() || isFPStatus()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); } @@ -110,7 +110,8 @@ StackPSV(PseudoSourceValue::Stack, TII), GOTPSV(PseudoSourceValue::GOT, TII), JumpTablePSV(PseudoSourceValue::JumpTable, TII), - ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {} + ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII), + FPStatusPSV(PseudoSourceValue::FPStatus, TII) {} const PseudoSourceValue *PseudoSourceValueManager::getStack() { return &StackPSV; @@ -126,6 +127,10 @@ return &JumpTablePSV; } +const PseudoSourceValue *PseudoSourceValueManager::getFPStatus() { + return &FPStatusPSV; +} + const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr &V = FSValues[FI]; Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6105,6 +6105,24 @@ Opcode == ISD::PREFETCH || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || + Opcode == ISD::STRICT_FADD || + Opcode == ISD::STRICT_FSUB || + Opcode == ISD::STRICT_FMUL || + Opcode == ISD::STRICT_FDIV || + Opcode == ISD::STRICT_FREM || + Opcode == ISD::STRICT_FMA || + Opcode == ISD::STRICT_FSQRT || + Opcode == ISD::STRICT_FPOW || + Opcode == ISD::STRICT_FPOWI || + Opcode == ISD::STRICT_FSIN || + Opcode == ISD::STRICT_FCOS || + Opcode == ISD::STRICT_FEXP || + Opcode == ISD::STRICT_FEXP2 || + Opcode == ISD::STRICT_FLOG || + Opcode == ISD::STRICT_FLOG10 || + Opcode == ISD::STRICT_FLOG2 || + Opcode == ISD::STRICT_FRINT || + Opcode == ISD::STRICT_FNEARBYINT || ((int)Opcode <= std::numeric_limits::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6186,6 +6186,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { + MachineFunction &MF = DAG.getMachineFunction(); SDLoc sdl = getCurSDLoc(); unsigned Opcode; switch (FPI.getIntrinsicID()) { @@ -6252,19 +6253,27 @@ ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); + EVT MemVT = EVT::getIntegerVT(*Context, 8); + MachinePointerInfo MPInfo = MachinePointerInfo::getFPStatus(MF); SDValue Result; if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)) }, + MemVT, MPInfo); else if (FPI.isTernaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)), - getValue(FPI.getArgOperand(2)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }, + MemVT, MPInfo); else - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)) }); + Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs, + { Chain, + getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }, + MemVT, MPInfo); assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1086,7 +1086,9 @@ // node should be mutated. // // FIXME: The backends need a way to handle FP constraints. - if (Node->isStrictFPOpcode()) + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) Node = CurDAG->mutateStrictFPToFP(Node); LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; @@ -3593,8 +3595,8 @@ // Only attach load or store memory operands if the generated // instruction may load or store. const MCInstrDesc &MCID = TII->get(TargetOpc); - bool mayLoad = MCID.mayLoad(); - bool mayStore = MCID.mayStore(); + bool mayLoad = MCID.mayAccessMemory() || MCID.mayLoad(); + bool mayStore = MCID.mayAccessMemory() || MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVectorImpl::const_iterator I = Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -645,6 +645,26 @@ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + // Constrained floating-point operations default to expand. + setOperationAction(ISD::STRICT_FADD, VT, Expand); + setOperationAction(ISD::STRICT_FSUB, VT, Expand); + setOperationAction(ISD::STRICT_FMUL, VT, Expand); + setOperationAction(ISD::STRICT_FDIV, VT, Expand); + setOperationAction(ISD::STRICT_FREM, VT, Expand); + setOperationAction(ISD::STRICT_FMA, VT, Expand); + setOperationAction(ISD::STRICT_FSQRT, VT, Expand); + setOperationAction(ISD::STRICT_FPOW, VT, Expand); + setOperationAction(ISD::STRICT_FPOWI, VT, Expand); + setOperationAction(ISD::STRICT_FSIN, VT, Expand); + setOperationAction(ISD::STRICT_FCOS, VT, Expand); + setOperationAction(ISD::STRICT_FEXP, VT, Expand); + setOperationAction(ISD::STRICT_FEXP2, VT, Expand); + setOperationAction(ISD::STRICT_FLOG, VT, Expand); + setOperationAction(ISD::STRICT_FLOG10, VT, Expand); + setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_FRINT, VT, Expand); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } Index: lib/Target/AMDGPU/R600InstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/R600InstrInfo.cpp +++ lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1503,6 +1503,7 @@ case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: + case PseudoSourceValue::FPStatus: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1904,6 +1904,7 @@ case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: + case PseudoSourceValue::FPStatus: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -401,6 +401,18 @@ setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + } } } Index: lib/Target/SystemZ/SystemZInstrDFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrDFP.td +++ lib/Target/SystemZ/SystemZInstrDFP.td @@ -20,7 +20,7 @@ //===----------------------------------------------------------------------===// // Load and test. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>; def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>; } @@ -32,25 +32,31 @@ // Convert floating-point values to narrower representations. The destination // of LDXTR is a 128-bit value, but only the first register of the pair is used. -def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; -def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; +let Uses = [FPC] in { + def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; + def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; +} // Extend floating-point values to wider representations. -def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; -def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; +let Uses = [FPC] in { + def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; + def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; +} // Convert a signed integer value to a floating-point one. -def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; -def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; -let Predicates = [FeatureFPExtension] in { - def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; - def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; - def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; - def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; +let Uses = [FPC] in { + def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; + def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; + let Predicates = [FeatureFPExtension] in { + def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; + def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; + def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; + def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; + } } // Convert an unsigned integer value to a floating-point one. -let Predicates = [FeatureFPExtension] in { +let Uses = [FPC], Predicates = [FeatureFPExtension] in { def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>; def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>; def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>; @@ -58,7 +64,7 @@ } // Convert a floating-point value to a signed integer value. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>; def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>; let Predicates = [FeatureFPExtension] in { @@ -70,7 +76,7 @@ } // Convert a floating-point value to an unsigned integer value. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { let Predicates = [FeatureFPExtension] in { def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>; def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>; @@ -108,7 +114,7 @@ } // Perform floating-point operation. -let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in +let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in def PFPO : SideEffectInherentE<"pfpo", 0x010A>; @@ -118,8 +124,10 @@ // Round to an integer, with the second operand (M3) specifying the rounding // mode. M4 can be set to 4 to suppress detection of inexact conditions. -def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; -def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; +let Uses = [FPC] in { + def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; + def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; +} // Extract biased exponent. def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>; @@ -135,7 +143,7 @@ //===----------------------------------------------------------------------===// // Addition. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { let isCommutable = 1 in { def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>; def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>; @@ -147,7 +155,7 @@ } // Subtraction. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>; def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>; let Predicates = [FeatureFPExtension] in { @@ -157,30 +165,38 @@ } // Multiplication. -let isCommutable = 1 in { - def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; - def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; -} -let Predicates = [FeatureFPExtension] in { - def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; - def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; +let Uses = [FPC] in { + let isCommutable = 1 in { + def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; + def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; + def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; + } } // Division. -def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; -def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; -let Predicates = [FeatureFPExtension] in { - def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; - def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; +let Uses = [FPC] in { + def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; + def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; + def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; + } } // Quantize. -def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; -def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; +let Uses = [FPC] in { + def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; + def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; +} // Reround. -def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; -def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; +let Uses = [FPC] in { + def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; + def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; +} // Shift significand left/right. def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>; @@ -198,13 +214,13 @@ //===----------------------------------------------------------------------===// // Compare. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>; def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>; } // Compare and signal. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>; def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>; } Index: lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFP.td +++ lib/Target/SystemZ/SystemZInstrFP.td @@ -53,7 +53,7 @@ // Moves between two floating-point registers that also set the condition // codes. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>; defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>; @@ -69,7 +69,7 @@ // Use a normal load-and-test for compare against zero in case of // vector support (via a pseudo to simplify instruction selection). -let Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { +let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>; def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>; def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>; @@ -174,16 +174,18 @@ // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>; -def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; -def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; - -def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>, - Requires<[FeatureFPExtension]>; -def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, - Requires<[FeatureFPExtension]>; -def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, - Requires<[FeatureFPExtension]>; +let Uses = [FPC] in { + def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>; + def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; + def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; + + def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; + def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; + def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; +} let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(f32 (fpround FP128:$src)), @@ -193,18 +195,22 @@ } // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Uses = [FPC] in { + def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; + def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; + def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +} let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; } // Extend memory floating-point values to wider representations. -def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Uses = [FPC] in { + def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; + def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; + def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +} let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), (LXEB bdxaddr12only:$src)>; @@ -213,17 +219,19 @@ } // Convert a signed integer register value to a floating-point one. -def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; -def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; -def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; - -def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; -def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; -def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; +let Uses = [FPC] in { + def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; + + def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; +} // The FP extension feature provides versions of the above that allow // specifying rounding mode and inexact-exception suppression flags. -let Predicates = [FeatureFPExtension] in { +let Uses = [FPC], Predicates = [FeatureFPExtension] in { def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>; def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>; def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>; @@ -235,13 +243,15 @@ // Convert am unsigned integer register value to a floating-point one. let Predicates = [FeatureFPExtension] in { - def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; - def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; - def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; - - def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>; - def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>; - def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; + let Uses = [FPC] in { + def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; + def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; + def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; + + def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>; + def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>; + def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; + } def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; @@ -254,7 +264,7 @@ // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>; def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>; def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>; @@ -275,7 +285,7 @@ // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. -let Predicates = [FeatureFPExtension], Defs = [CC] in { +let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in { def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>; def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>; def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>; @@ -287,7 +297,7 @@ // Convert a floating-point register value to an unsigned integer value. let Predicates = [FeatureFPExtension] in { - let Defs = [CC] in { + let Uses = [FPC], Defs = [CC] in { def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>; def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>; def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>; @@ -353,36 +363,42 @@ def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>; // Square root. -def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; -def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; -def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +let Uses = [FPC], mayAccessMemory = 1 in { + def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>; + def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>; + def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>; -def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; -def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; + def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; + def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; +} // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. These forms always check for inexact conditions. -def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; -def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; -def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; +let Uses = [FPC], mayAccessMemory = 1 in { + def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; + def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; + def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; +} // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; -def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; -def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; +def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>; let Predicates = [FeatureFPExtension] in { // Extended forms of the FIxBR instructions. M4 can be set to 4 // to suppress detection of inexact conditions. - def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; - def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; - def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; + let Uses = [FPC], mayAccessMemory = 1 in { + def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; + def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; + def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; + } // fnearbyint is like frint but does not detect inexact conditions. - def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; - def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; - def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. @@ -413,87 +429,102 @@ //===----------------------------------------------------------------------===// // Addition. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], mayAccessMemory = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; + def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>; } // Subtraction. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; +let Uses = [FPC], mayAccessMemory = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; + def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>; } // Multiplication. -let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; +let Uses = [FPC], mayAccessMemory = 1 in { + let isCommutable = 1 in { + def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; + } + def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>; + def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>; } -def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; -def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; // f64 multiplication of two FP32 registers. -def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), +let Uses = [FPC], mayAccessMemory = 1 in + def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), - (f64 (extloadf32 bdxaddr12only:$addr))), +let Uses = [FPC], mayAccessMemory = 1 in + def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +let Uses = [FPC], mayAccessMemory = 1 in + def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (fpextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; +let Uses = [FPC], mayAccessMemory = 1 in + def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>; +let Uses = [FPC], mayAccessMemory = 1 in { + def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; + def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; + def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; + def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; +} // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>; +let Uses = [FPC], mayAccessMemory = 1 in { + def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; + def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; + def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; + def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; +} // Division. -def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; -def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; -def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +let Uses = [FPC], mayAccessMemory = 1 in { + def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>; + def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; + def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; -def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; -def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; + def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>; + def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; +} // Divide to integer. -let Defs = [CC] in { +let Uses = [FPC], Defs = [CC] in { def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>; def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>; } @@ -502,7 +533,7 @@ // Comparisons //===----------------------------------------------------------------------===// -let Defs = [CC], CCValues = 0xF in { +let Uses = [FPC], Defs = [CC], CCValues = 0xF in { def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; @@ -532,20 +563,28 @@ let hasSideEffects = 1 in { let mayLoad = 1, mayStore = 1 in { // TODO: EFPC and SFPC do not touch memory at all - def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; - def STFPC : StoreInherentS<"stfpc", 0xB29C, storei, 4>; - - def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; - def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu, 4>; + let Uses = [FPC] in { + def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; + def STFPC : StoreInherentS<"stfpc", 0xB29C, storei, 4>; + } + + let Defs = [FPC] in { + def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; + def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu, 4>; + } } - def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; - def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; + let Defs = [FPC] in { + def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; + def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; + } - def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>, - Requires<[FeatureFPExtension]>; - def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>; - def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>; + let Uses = [FPC], Defs = [FPC] in { + def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>, + Requires<[FeatureFPExtension]>; + def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>; + def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>; + } } //===----------------------------------------------------------------------===// Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -925,8 +925,8 @@ // See comments in SystemZInstrFP.td for the suppression flags and // rounding modes. multiclass VectorRounding { - def : FPConversion; - def : FPConversion; + def : FPConversion; + def : FPConversion; def : FPConversion; def : FPConversion; def : FPConversion; @@ -935,94 +935,118 @@ let Predicates = [FeatureVector] in { // Add. - def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; - def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; - def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; - def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; - def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>; + } } // Convert from fixed 64-bit. - def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; - def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; - def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + let Uses = [FPC] in { + def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; + def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; + def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + } def : FPConversion; // Convert from logical 64-bit. - def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; - def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; - def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + let Uses = [FPC] in { + def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; + def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; + def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + } def : FPConversion; // Convert to fixed 64-bit. - def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; - def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; - def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + let Uses = [FPC] in { + def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; + def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; + def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion; // Convert to logical 64-bit. - def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; - def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; - def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + let Uses = [FPC] in { + def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; + def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; + def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion; // Divide. - def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; - def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; - def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; - def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; - def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>; + } } // Load FP integer. - def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; - def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; - def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; + def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + } defm : VectorRounding; defm : VectorRounding; let Predicates = [FeatureVectorEnhancements1] in { - def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; - def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; - def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + } defm : VectorRounding; defm : VectorRounding; defm : VectorRounding; } // Load lengthened. - def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + let Uses = [FPC] in { + def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + } let Predicates = [FeatureVectorEnhancements1] in { - let isAsmParserOnly = 1 in { - def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; - def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; - def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + let Uses = [FPC] in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; } - def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; def : Pat<(f128 (fpextend (f32 VR32:$src))), (WFLLD (WLDEB VR32:$src))>; } // Load rounded. - def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; - def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; - def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + let Uses = [FPC] in { + def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : FPConversion; let Predicates = [FeatureVectorEnhancements1] in { - let isAsmParserOnly = 1 in { - def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; - def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; - def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + let Uses = [FPC] in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; } - def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; def : FPConversion; def : Pat<(f32 (fpround (f128 VR128:$src))), (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; @@ -1034,17 +1058,19 @@ def : FPMinMax; } let Predicates = [FeatureVectorEnhancements1] in { - def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; - def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, - v128db, v128db, 3, 0>; - def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, - v64db, v64db, 3, 8>; - def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, - v128sb, v128sb, 2, 0>; - def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, - v32sb, v32sb, 2, 8>; - def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, - v128xb, v128xb, 4, 8>; + let Uses = [FPC] in { + def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; + def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, + v128db, v128db, 3, 0>; + def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, + v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; + } defm : VectorMax; defm : VectorMax; defm : VectorMax; @@ -1058,17 +1084,19 @@ def : FPMinMax; } let Predicates = [FeatureVectorEnhancements1] in { - def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; - def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, - v128db, v128db, 3, 0>; - def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, - v64db, v64db, 3, 8>; - def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, - v128sb, v128sb, 2, 0>; - def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, - v32sb, v32sb, 2, 8>; - def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, - v128xb, v128xb, 4, 8>; + let Uses = [FPC] in { + def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; + def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, + v128db, v128db, 3, 0>; + def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, + v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; + } defm : VectorMin; defm : VectorMin; defm : VectorMin; @@ -1077,53 +1105,61 @@ } // Multiply. - def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; - def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; - def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; - def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; - def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>; + } } // Multiply and add. - def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; - def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; - def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; - def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; - def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>; + } } // Multiply and subtract. - def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; - def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; - def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; - def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; - def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>; + } } // Negative multiply and add. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayAccessMemory = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; - def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; - def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; - def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; - def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; - def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>; } // Negative multiply and subtract. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayAccessMemory = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; - def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; - def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; - def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; - def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; - def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>; } // Perform sign operation. @@ -1164,23 +1200,27 @@ } // Square root. - def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; - def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; - def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; - def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; - def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>; + } } // Subtract. - def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; - def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; - def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; - def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; - def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; + let Uses = [FPC], mayAccessMemory = 1 in { + def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>; + } } // Test data class immediate. @@ -1202,7 +1242,7 @@ let Predicates = [FeatureVector] in { // Compare scalar. - let Defs = [CC] in { + let Uses = [FPC], Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1212,7 +1252,7 @@ } // Compare and signal scalar. - let Defs = [CC] in { + let Uses = [FPC], Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1222,22 +1262,24 @@ } // Compare equal. - def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, - v128g, v128db, 3, 0>; - defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, - v128f, v128sb, 2, 0>; - defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC] in { + def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128g, v128db, 3, 0>; + defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal equal. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, @@ -1251,22 +1293,24 @@ } // Compare high. - def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, - v128g, v128db, 3, 0>; - defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, - v128f, v128sb, 2, 0>; - defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC] in { + def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128g, v128db, 3, 0>; + defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal high. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, @@ -1280,22 +1324,24 @@ } // Compare high or equal. - def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, - v128g, v128db, 3, 0>; - defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, - v64g, v64db, 3, 8>; - let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, - v128f, v128sb, 2, 0>; - defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, - v32f, v32sb, 2, 8>; - defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, - v128q, v128xb, 4, 8>; + let Uses = [FPC] in { + def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128g, v128db, 3, 0>; + defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } } // Compare and signal high or equal. - let Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -647,21 +647,21 @@ (add (mul node:$src1, node:$src2), node:$src3)>; // Fused multiply-subtract, using the natural operand order. -def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src1, node:$src2, (fneg node:$src3))>; +def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src1, node:$src2, (fneg node:$src3))>; // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. -def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, node:$src1)>; -def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, (fneg node:$src1))>; +def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, node:$src1)>; +def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, (fneg node:$src1))>; // Negative fused multiply-add and multiply-subtract. -def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fma node:$src1, node:$src2, node:$src3))>; -def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fms node:$src1, node:$src2, node:$src3))>; +def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fma node:$src1, node:$src2, node:$src3))>; +def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fms node:$src1, node:$src2, node:$src3))>; // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -156,6 +156,9 @@ Reserved.set(SystemZ::A0); Reserved.set(SystemZ::A1); + // FPC is the floating-point control register. + Reserved.set(SystemZ::FPC); + return Reserved; } Index: lib/Target/SystemZ/SystemZRegisterInfo.td =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.td +++ lib/Target/SystemZ/SystemZRegisterInfo.td @@ -299,6 +299,13 @@ let isAllocatable = 0, CopyCost = -1 in def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>; +// The floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags and DXC are not modeled here. +def FPC : SystemZReg<"fpc">; +let isAllocatable = 0 in + def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>; + // Access registers. class ACR32 num, string n> : SystemZReg { let HWEncoding = num; Index: test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir =================================================================== --- test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir +++ test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir @@ -181,11 +181,11 @@ J %bb.3 bb.3: - WFCDB undef %46, %45, implicit-def $cc + WFCDB undef %46, %45, implicit-def $cc, implicit $fpc %48 = IPM implicit killed $cc %48 = AFIMux %48, 268435456, implicit-def dead $cc %6 = RISBMux undef %6, %48, 31, 159, 35 - WFCDB undef %50, %45, implicit-def $cc + WFCDB undef %50, %45, implicit-def $cc, implicit $fpc BRC 15, 6, %bb.1, implicit killed $cc J %bb.4 Index: test/CodeGen/SystemZ/clear-liverange-spillreg.mir =================================================================== --- test/CodeGen/SystemZ/clear-liverange-spillreg.mir +++ test/CodeGen/SystemZ/clear-liverange-spillreg.mir @@ -401,7 +401,7 @@ BRC 14, 6, %bb.29, implicit killed $cc bb.28: - %130 = CDFBR %60 + %130 = CDFBR %60, implicit $fpc J %bb.30 bb.29: Index: test/CodeGen/SystemZ/fp-cmp-07.mir =================================================================== --- test/CodeGen/SystemZ/fp-cmp-07.mir +++ test/CodeGen/SystemZ/fp-cmp-07.mir @@ -30,7 +30,7 @@ bb.0.entry: liveins: $f0s, $r2d - LTEBRCompare $f0s, $f0s, implicit-def $cc + LTEBRCompare $f0s, $f0s, implicit-def $cc, implicit $fpc $f2s = LER $f0s INLINEASM &"blah $0", 1, 9, $f2s CondReturn 15, 4, implicit $f0s, implicit $cc Index: test/CodeGen/SystemZ/fp-conv-17.mir =================================================================== --- test/CodeGen/SystemZ/fp-conv-17.mir +++ test/CodeGen/SystemZ/fp-conv-17.mir @@ -163,39 +163,39 @@ STE %16, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) STE %17, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) STE %18, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - %19 = LDEBR %2 + %19 = LDEBR %2, implicit $fpc STD %19, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %20 = LDEBR %3 + %20 = LDEBR %3, implicit $fpc STD %20, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %21 = LDEBR %4 + %21 = LDEBR %4, implicit $fpc STD %21, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %22 = LDEBR %5 + %22 = LDEBR %5, implicit $fpc STD %22, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %23 = LDEBR %6 + %23 = LDEBR %6, implicit $fpc STD %23, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %24 = LDEBR %7 + %24 = LDEBR %7, implicit $fpc STD %24, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %25 = LDEBR %8 + %25 = LDEBR %8, implicit $fpc STD %25, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %26 = LDEBR %9 + %26 = LDEBR %9, implicit $fpc STD %26, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %27 = LDEBR %10 + %27 = LDEBR %10, implicit $fpc STD %27, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %28 = LDEBR %11 + %28 = LDEBR %11, implicit $fpc STD %28, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %29 = LDEBR %12 + %29 = LDEBR %12, implicit $fpc STD %29, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %30 = LDEBR %13 + %30 = LDEBR %13, implicit $fpc STD %30, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %31 = LDEBR %14 + %31 = LDEBR %14, implicit $fpc STD %31, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %32 = LDEBR %15 + %32 = LDEBR %15, implicit $fpc STD %32, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %33 = LDEBR %16 + %33 = LDEBR %16, implicit $fpc STD %33, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %34 = LDEBR %17 + %34 = LDEBR %17, implicit $fpc STD %34, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) - %35 = LDEBR %18 + %35 = LDEBR %18, implicit $fpc STD %35, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) Return Index: test/CodeGen/SystemZ/fp-strict-add-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-01.ll +++ test/CodeGen/SystemZ/fp-strict-add-01.ll @@ -0,0 +1,173 @@ +; Test 32-bit floating-point strict addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) + +; Check register addition. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the AEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned AEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: aeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that AEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: aeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that additions of spilled values can use AEB rather than AEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %add0 = call float @llvm.experimental.constrained.fadd.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add1 = call float @llvm.experimental.constrained.fadd.f32( + float %add0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add2 = call float @llvm.experimental.constrained.fadd.f32( + float %add1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add3 = call float @llvm.experimental.constrained.fadd.f32( + float %add2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add4 = call float @llvm.experimental.constrained.fadd.f32( + float %add3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add5 = call float @llvm.experimental.constrained.fadd.f32( + float %add4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add6 = call float @llvm.experimental.constrained.fadd.f32( + float %add5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add7 = call float @llvm.experimental.constrained.fadd.f32( + float %add6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add8 = call float @llvm.experimental.constrained.fadd.f32( + float %add7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add9 = call float @llvm.experimental.constrained.fadd.f32( + float %add8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add10 = call float @llvm.experimental.constrained.fadd.f32( + float %add9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %add10 +} Index: utils/TableGen/CodeGenDAGPatterns.h =================================================================== --- utils/TableGen/CodeGenDAGPatterns.h +++ utils/TableGen/CodeGenDAGPatterns.h @@ -336,6 +336,7 @@ TreePattern &TP; unsigned ForceMode; // Mode to use when set. bool CodeGen = false; // Set during generation of matcher code. + bool Validate = true; // Indicate whether to validate types. private: TypeSetByHwMode getLegalTypes(); Index: utils/TableGen/CodeGenDAGPatterns.cpp =================================================================== --- utils/TableGen/CodeGenDAGPatterns.cpp +++ utils/TableGen/CodeGenDAGPatterns.cpp @@ -808,7 +808,7 @@ #ifndef NDEBUG TypeInfer::ValidateOnExit::~ValidateOnExit() { - if (!VTS.validate()) { + if (Infer.Validate && !VTS.validate()) { dbgs() << "Type set is empty for each HW mode:\n" "possible type contradiction in the pattern below " "(use -print-records with llvm-tblgen to see all " @@ -3018,8 +3018,13 @@ ThePat.InlinePatternFragments(); // Infer as many types as possible. Don't worry about it if we don't infer - // all of them, some may depend on the inputs of the pattern. + // all of them, some may depend on the inputs of the pattern. Also, don't + // validate type sets; validation may cause spurious failures e.g. if a + // fragment needs floating-point types but the current target does not have + // any (this is only an error if that fragment is ever used!). + ThePat.getInfer().Validate = false; ThePat.InferAllTypes(); + ThePat.getInfer().Validate = true; ThePat.resetError(); // If debugging, print out the pattern fragment result. Index: utils/TableGen/CodeGenInstruction.h =================================================================== --- utils/TableGen/CodeGenInstruction.h +++ utils/TableGen/CodeGenInstruction.h @@ -237,6 +237,7 @@ bool mayLoad_Unset : 1; bool mayStore : 1; bool mayStore_Unset : 1; + bool mayAccessMemory : 1; bool isPredicable : 1; bool isConvertibleToThreeAddress : 1; bool isCommutable : 1; Index: utils/TableGen/CodeGenInstruction.cpp =================================================================== --- utils/TableGen/CodeGenInstruction.cpp +++ utils/TableGen/CodeGenInstruction.cpp @@ -335,6 +335,7 @@ mayLoad_Unset = Unset; mayStore = R->getValueAsBitOrUnset("mayStore", Unset); mayStore_Unset = Unset; + mayAccessMemory = R->getValueAsBit("mayAccessMemory"); hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset); hasSideEffects_Unset = Unset; Index: utils/TableGen/DAGISelMatcherGen.cpp =================================================================== --- utils/TableGen/DAGISelMatcherGen.cpp +++ utils/TableGen/DAGISelMatcherGen.cpp @@ -689,7 +689,7 @@ Record *Op = N->getOperator(); const CodeGenTarget &CGT = CGP.getTargetInfo(); CodeGenInstruction &II = CGT.getInstruction(Op); - return II.mayLoad || II.mayStore; + return II.mayLoad || II.mayStore || II.mayAccessMemory; } static unsigned @@ -734,10 +734,14 @@ // Instructions which load and store from memory should have a chain, // regardless of whether they happen to have an internal pattern saying so. - if (Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP) - && (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad || - II.hasSideEffects)) - NodeHasChain = true; + if (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad || + II.hasSideEffects) + NodeHasChain = true; + + // However, if the source pattern doesn't have even chain, we never need + // to emit a chain to the output node either. + if (!Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP)) + NodeHasChain = false; bool isRoot = N == Pattern.getDstPattern(); Index: utils/TableGen/InstrInfoEmitter.cpp =================================================================== --- utils/TableGen/InstrInfoEmitter.cpp +++ utils/TableGen/InstrInfoEmitter.cpp @@ -583,6 +583,7 @@ if (Inst.canFoldAsLoad) OS << "|(1ULL<