Index: include/llvm/CodeGen/MachineInstr.h =================================================================== --- include/llvm/CodeGen/MachineInstr.h +++ include/llvm/CodeGen/MachineInstr.h @@ -102,8 +102,10 @@ // no unsigned wrap. NoSWrap = 1 << 12, // Instruction supports binary operator // no signed wrap. - IsExact = 1 << 13 // Instruction supports division is + IsExact = 1 << 13, // Instruction supports division is // known to be exact. + FPExcept = 1 << 14, // Instruction may raise floating-point + // exceptions. }; private: @@ -830,6 +832,17 @@ return mayLoad(Type) || mayStore(Type); } + /// Return true it this instruction could possibly raise a floating-point + /// exception. This is the case if the instruction is a floating-point + /// instruction that can in principle raise an exception, as indicated + /// by the MCID::MayRaiseFPException property, *and* at the same time, + /// the instruction is used in a context where we expect floating-point + /// exceptions might be enabled, as indicated by the FPExcept MI flag. + bool mayRaiseFPException() const { + return hasProperty(MCID::MayRaiseFPException) && + getFlag(MachineInstr::MIFlag::FPExcept); + } + //===--------------------------------------------------------------------===// // Flags that indicate whether an instruction can be modified by a method. //===--------------------------------------------------------------------===// Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -368,6 +368,13 @@ bool ApproximateFuncs : 1; bool AllowReassociation : 1; + // We assume instructions do not raise floating-point exceptions by default, + // and only those marked explicitly may do so. We could choose to represent + // this via a positive "FPExcept" flags like on the MI level, but having a + // negative "NoExcept" flag here (that defaults to true) makes the flag + // intersection logic more straightforward. + bool NoExcept : 1; + public: /// Default constructor turns off all optimization flags. SDNodeFlags() @@ -375,7 +382,7 @@ Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false) {} + AllowReassociation(false), NoExcept(true) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -438,6 +445,10 @@ setDefined(); AllowReassociation = b; } + void setFPExcept(bool b) { + setDefined(); + NoExcept = !b; + } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -451,9 +462,10 @@ bool hasAllowContract() const { return AllowContract; } bool hasApproximateFuncs() const { return ApproximateFuncs; } bool hasAllowReassociation() const { return AllowReassociation; } + bool hasFPExcept() const { return !NoExcept; } bool isFast() const { - return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && + return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoExcept && AllowContract && ApproximateFuncs && AllowReassociation; } @@ -473,6 +485,7 @@ AllowContract &= Flags.AllowContract; ApproximateFuncs &= Flags.ApproximateFuncs; AllowReassociation &= Flags.AllowReassociation; + NoExcept &= Flags.NoExcept; } }; Index: include/llvm/MC/MCInstrDesc.h =================================================================== --- include/llvm/MC/MCInstrDesc.h +++ include/llvm/MC/MCInstrDesc.h @@ -134,6 +134,7 @@ FoldableAsLoad, MayLoad, MayStore, + MayRaiseFPException, Predicable, NotDuplicable, UnmodeledSideEffects, @@ -403,6 +404,11 @@ /// may not actually modify anything, for example. bool mayStore() const { return Flags & (1ULL << MCID::MayStore); } + /// Return true if this instruction may raise a floating-point exception. + bool mayRaiseFPException() const { + return Flags & (1ULL << MCID::MayRaiseFPException); + } + /// Return true if this instruction has side /// effects that are not modeled by other flags. This does not return true /// for instructions whose effects are captured by: Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -456,6 +456,7 @@ bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? bit mayLoad = ?; // Is it possible for this inst to read memory? bit mayStore = ?; // Is it possible for this inst to write memory? + bit mayRaiseFPException = 0; // Can this raise a floating-point exception? bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -461,6 +461,49 @@ def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; +def strict_fadd : SDNode<"ISD::STRICT_FADD", + SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>; +def strict_fsub : SDNode<"ISD::STRICT_FSUB", + SDTFPBinOp, [SDNPHasChain]>; +def strict_fmul : SDNode<"ISD::STRICT_FMUL", + SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>; +def strict_fdiv : SDNode<"ISD::STRICT_FDIV", + SDTFPBinOp, [SDNPHasChain]>; +def strict_frem : SDNode<"ISD::STRICT_FREM", + SDTFPBinOp, [SDNPHasChain]>; +def strict_fma : SDNode<"ISD::STRICT_FMA", + SDTFPTernaryOp, [SDNPHasChain]>; +def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fsin : SDNode<"ISD::STRICT_FSIN", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fcos : SDNode<"ISD::STRICT_FCOS", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fpow : SDNode<"ISD::STRICT_FPOW", + SDTFPBinOp, [SDNPHasChain]>; +def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_frint : SDNode<"ISD::STRICT_FRINT", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fceil : SDNode<"ISD::STRICT_FCEIL", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fround : SDNode<"ISD::STRICT_FROUND", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM", + SDTFPBinOp, [SDNPHasChain, + SDNPCommutative, SDNPAssociative]>; +def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM", + SDTFPBinOp, [SDNPHasChain, + SDNPCommutative, SDNPAssociative]>; + def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; @@ -1171,6 +1214,68 @@ def setne : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETNE)>; +// Convenience fragments to match both strict and non-strict fp operations +def any_fadd : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fadd node:$lhs, node:$rhs), + (fadd node:$lhs, node:$rhs)]>; +def any_fsub : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fsub node:$lhs, node:$rhs), + (fsub node:$lhs, node:$rhs)]>; +def any_fmul : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fmul node:$lhs, node:$rhs), + (fmul node:$lhs, node:$rhs)]>; +def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fdiv node:$lhs, node:$rhs), + (fdiv node:$lhs, node:$rhs)]>; +def any_frem : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_frem node:$lhs, node:$rhs), + (frem node:$lhs, node:$rhs)]>; +def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(strict_fma node:$src1, node:$src2, node:$src3), + (fma node:$src1, node:$src2, node:$src3)]>; +def any_fsqrt : PatFrags<(ops node:$src), + [(strict_fsqrt node:$src), + (fsqrt node:$src)]>; +def any_fsin : PatFrags<(ops node:$src), + [(strict_fsin node:$src), + (fsin node:$src)]>; +def any_fcos : PatFrags<(ops node:$src), + [(strict_fcos node:$src), + (fcos node:$src)]>; +def any_fexp2 : PatFrags<(ops node:$src), + [(strict_fexp2 node:$src), + (fexp2 node:$src)]>; +def any_fpow : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fpow node:$lhs, node:$rhs), + (fpow node:$lhs, node:$rhs)]>; +def any_flog2 : PatFrags<(ops node:$src), + [(strict_flog2 node:$src), + (flog2 node:$src)]>; +def any_frint : PatFrags<(ops node:$src), + [(strict_frint node:$src), + (frint node:$src)]>; +def any_fnearbyint : PatFrags<(ops node:$src), + [(strict_fnearbyint node:$src), + (fnearbyint node:$src)]>; +def any_fceil : PatFrags<(ops node:$src), + [(strict_fceil node:$src), + (fceil node:$src)]>; +def any_ffloor : PatFrags<(ops node:$src), + [(strict_ffloor node:$src), + (ffloor node:$src)]>; +def any_fround : PatFrags<(ops node:$src), + [(strict_fround node:$src), + (fround node:$src)]>; +def any_ftrunc : PatFrags<(ops node:$src), + [(strict_ftrunc node:$src), + (ftrunc node:$src)]>; +def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fmaxnum node:$lhs, node:$rhs), + (fmaxnum node:$lhs, node:$rhs)]>; +def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs), + [(strict_fminnum node:$lhs, node:$rhs), + (fminnum node:$lhs, node:$rhs)]>; + multiclass binary_atomic_op_ord { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast(#NAME) node:$ptr, node:$val)> { Index: lib/CodeGen/GlobalISel/InstructionSelector.cpp =================================================================== --- lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -78,6 +78,6 @@ std::next(MI.getIterator()) == IntoMI.getIterator()) return true; - return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && - empty(MI.implicit_operands()); + return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && + !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); } Index: lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- lib/CodeGen/ImplicitNullChecks.cpp +++ lib/CodeGen/ImplicitNullChecks.cpp @@ -229,7 +229,8 @@ } // end anonymous namespace bool ImplicitNullChecks::canHandle(const MachineInstr *MI) { - if (MI->isCall() || MI->hasUnmodeledSideEffects()) + if (MI->isCall() || MI->mayRaiseFPException() || + MI->hasUnmodeledSideEffects()) return false; auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); }; (void)IsRegMask; Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -73,6 +73,7 @@ kw_nuw, kw_nsw, kw_exact, + kw_fpexcept, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -204,6 +204,7 @@ .Case("nuw" , MIToken::kw_nuw) .Case("nsw" , MIToken::kw_nsw) .Case("exact" , MIToken::kw_exact) + .Case("fpexcept", MIToken::kw_fpexcept) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -1136,7 +1136,8 @@ Token.is(MIToken::kw_reassoc) || Token.is(MIToken::kw_nuw) || Token.is(MIToken::kw_nsw) || - Token.is(MIToken::kw_exact)) { + Token.is(MIToken::kw_exact) || + Token.is(MIToken::kw_fpexcept)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -1162,6 +1163,8 @@ Flags |= MachineInstr::NoSWrap; if (Token.is(MIToken::kw_exact)) Flags |= MachineInstr::IsExact; + if (Token.is(MIToken::kw_fpexcept)) + Flags |= MachineInstr::FPExcept; lex(); } Index: lib/CodeGen/MIRPrinter.cpp =================================================================== --- lib/CodeGen/MIRPrinter.cpp +++ lib/CodeGen/MIRPrinter.cpp @@ -710,6 +710,8 @@ OS << "nsw "; if (MI.getFlag(MachineInstr::IsExact)) OS << "exact "; + if (MI.getFlag(MachineInstr::FPExcept)) + OS << "fpexcept "; OS << TII->getName(MI.getOpcode()); if (I < E) Index: lib/CodeGen/MachineCSE.cpp =================================================================== --- lib/CodeGen/MachineCSE.cpp +++ lib/CodeGen/MachineCSE.cpp @@ -382,7 +382,7 @@ // Ignore stuff that we obviously can't move. if (MI->mayStore() || MI->isCall() || MI->isTerminator() || - MI->hasUnmodeledSideEffects()) + MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) return false; if (MI->mayLoad()) { Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1178,7 +1178,7 @@ } if (isPosition() || isDebugInstr() || isTerminator() || - hasUnmodeledSideEffects()) + mayRaiseFPException() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1544,6 +1544,8 @@ OS << "nsw "; if (getFlag(MachineInstr::IsExact)) OS << "exact "; + if (getFlag(MachineInstr::FPExcept)) + OS << "fpexcept "; // Print the opcode name. if (TII) Index: lib/CodeGen/MachinePipeliner.cpp =================================================================== --- lib/CodeGen/MachinePipeliner.cpp +++ lib/CodeGen/MachinePipeliner.cpp @@ -533,7 +533,8 @@ /// Return true if the instruction causes a chain between memory /// references before and after it. static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { - return MI.isCall() || MI.hasUnmodeledSideEffects() || + return MI.isCall() || MI.mayRaiseFPException() || + MI.hasUnmodeledSideEffects() || (MI.hasOrderedMemoryRef() && (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); } @@ -3128,6 +3129,7 @@ // Assume ordered loads and stores may have a loop carried dependence. if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() || + SI->mayRaiseFPException() || DI->mayRaiseFPException() || SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef()) return true; Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -1825,7 +1825,7 @@ assert(Def->isBitcast() && "Invalid definition"); // Bail if there are effects that a plain copy will not expose. - if (Def->hasUnmodeledSideEffects()) + if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects()) return ValueTrackerResult(); // Bitcasts with more than one def are not supported. Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -712,6 +712,7 @@ AAForDep = UseAA ? AA : nullptr; BarrierChain = nullptr; + SUnit *FPBarrierChain = nullptr; this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); @@ -871,9 +872,21 @@ addBarrierChain(NonAliasStores); addBarrierChain(NonAliasLoads); + // Add dependency against previous FP barrier and reset FP barrier. + if (FPBarrierChain) + FPBarrierChain->addPredBarrier(BarrierChain); + FPBarrierChain = BarrierChain; + continue; } + // Instructions that may raise FP exceptions depend on each other. + if (MI.mayRaiseFPException()) { + if (FPBarrierChain) + FPBarrierChain->addPredBarrier(SU); + FPBarrierChain = SU; + } + // If it's not a store or a variant load, we're done. if (!MI.mayStore() && !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) Index: lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -899,6 +899,9 @@ if (Flags.hasExact()) MI->setFlag(MachineInstr::MIFlag::IsExact); + + if (Flags.hasFPExcept()) + MI->setFlag(MachineInstr::MIFlag::FPExcept); } // Emit all of the actual operands of this instruction, adding them to the Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6910,6 +6910,13 @@ { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); + if (FPI.getExceptionBehavior() != + ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { + SDNodeFlags Flags; + Flags.setFPExcept(true); + Result->setFlags(Flags); + } + assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1118,16 +1118,14 @@ #endif // When we are using non-default rounding modes or FP exception behavior - // FP operations are represented by StrictFP pseudo-operations. They - // need to be simplified here so that the target-specific instruction - // selectors know how to handle them. - // - // If the current node is a strict FP pseudo-op, the isStrictFPOp() - // function will provide the corresponding normal FP opcode to which the - // node should be mutated. - // - // FIXME: The backends need a way to handle FP constraints. - if (Node->isStrictFPOpcode()) + // FP operations are represented by StrictFP pseudo-operations. For + // targets that do not (yet) understand strict FP operations directly, + // we convert them to normal FP opcodes instead at this point. This + // will allow them to be handled by existing target-specific instruction + // selectors. + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) Node = CurDAG->mutateStrictFPToFP(Node); LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -897,7 +897,8 @@ return true; // Avoid instructions obviously unsafe for remat. - if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects()) + if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() || + MI.hasUnmodeledSideEffects()) return false; // Don't remat inline asm. We have no idea how expensive it is Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -662,6 +662,32 @@ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + // Constrained floating-point operations default to expand. + setOperationAction(ISD::STRICT_FADD, VT, Expand); + setOperationAction(ISD::STRICT_FSUB, VT, Expand); + setOperationAction(ISD::STRICT_FMUL, VT, Expand); + setOperationAction(ISD::STRICT_FDIV, VT, Expand); + setOperationAction(ISD::STRICT_FREM, VT, Expand); + setOperationAction(ISD::STRICT_FMA, VT, Expand); + setOperationAction(ISD::STRICT_FSQRT, VT, Expand); + setOperationAction(ISD::STRICT_FPOW, VT, Expand); + setOperationAction(ISD::STRICT_FPOWI, VT, Expand); + setOperationAction(ISD::STRICT_FSIN, VT, Expand); + setOperationAction(ISD::STRICT_FCOS, VT, Expand); + setOperationAction(ISD::STRICT_FEXP, VT, Expand); + setOperationAction(ISD::STRICT_FEXP2, VT, Expand); + setOperationAction(ISD::STRICT_FLOG, VT, Expand); + setOperationAction(ISD::STRICT_FLOG10, VT, Expand); + setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_FRINT, VT, Expand); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); + setOperationAction(ISD::STRICT_FCEIL, VT, Expand); + setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_FROUND, VT, Expand); + setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); + setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); + setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -401,6 +401,22 @@ setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + setOperationAction(ISD::STRICT_FROUND, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + } } } @@ -432,6 +448,20 @@ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); } // The vector enhancements facility 1 has instructions for these. @@ -475,6 +505,25 @@ setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); setOperationAction(ISD::FMINNUM, MVT::f128, Legal); setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + for (auto VT : { MVT::f32, MVT::f64, MVT::f128, + MVT::v4f32, MVT::v2f64 }) { + setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); + } } // We have fused multiply-addition for f32 and f64 but not f128. Index: lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFP.td +++ lib/Target/SystemZ/SystemZInstrFP.td @@ -52,7 +52,8 @@ // Moves between two floating-point registers that also set the condition // codes. -let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>; defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>; @@ -68,7 +69,8 @@ // Use a normal load-and-test for compare against zero in case of // vector support (via a pseudo to simplify instruction selection). -let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>; def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>; def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>; @@ -173,7 +175,7 @@ // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>; def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; @@ -194,7 +196,7 @@ } // Extend register floating-point values to wider representations. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; @@ -205,7 +207,7 @@ } // Extend memory floating-point values to wider representations. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; @@ -218,7 +220,7 @@ } // Convert a signed integer register value to a floating-point one. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; @@ -230,7 +232,7 @@ // The FP extension feature provides versions of the above that allow // specifying rounding mode and inexact-exception suppression flags. -let Uses = [FPC], Predicates = [FeatureFPExtension] in { +let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in { def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>; def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>; def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>; @@ -242,7 +244,7 @@ // Convert am unsigned integer register value to a floating-point one. let Predicates = [FeatureFPExtension] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>; def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>; @@ -263,7 +265,7 @@ // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. -let Uses = [FPC], Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>; def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>; def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>; @@ -284,7 +286,8 @@ // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. -let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureFPExtension], Defs = [CC] in { def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>; def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>; def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>; @@ -296,7 +299,7 @@ // Convert a floating-point register value to an unsigned integer value. let Predicates = [FeatureFPExtension] in { - let Uses = [FPC], Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>; def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>; def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>; @@ -362,18 +365,18 @@ def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>; // Square root. -let Uses = [FPC] in { - def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; - def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; - def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>; + def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>; + def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>; - def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; - def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; + def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; + def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; } // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. These forms always check for inexact conditions. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>; def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>; def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>; @@ -381,46 +384,46 @@ // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. -def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; -def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; -def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; +def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>; +def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>; +def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>; let Predicates = [FeatureFPExtension] in { // Extended forms of the FIxBR instructions. M4 can be set to 4 // to suppress detection of inexact conditions. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>; def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>; def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>; } // fnearbyint is like frint but does not detect inexact conditions. - def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; - def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; - def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. // Mode 7 is round towards -inf. - def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; - def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; - def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; // Same idea for ceil, where mode 6 is round towards +inf. - def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; - def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; - def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; // Same idea for trunc, where mode 5 is round towards zero. - def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; - def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; - def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; // Same idea for round, where mode 1 is round towards nearest with // ties away from zero. - def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; - def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>; - def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>; + def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>; } //===----------------------------------------------------------------------===// @@ -428,99 +431,102 @@ //===----------------------------------------------------------------------===// // Addition. -let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; + def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>; } // Subtraction. -let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1, + Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; + def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>; } // Multiplication. -let Uses = [FPC] in { +let Uses = [FPC], mayRaiseFPException = 1 in { let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; + def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; } - def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; - def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; + def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>; + def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>; } // f64 multiplication of two FP32 registers. -let Uses = [FPC] in +let Uses = [FPC], mayRaiseFPException = 1 in def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -let Uses = [FPC] in +let Uses = [FPC], mayRaiseFPException = 1 in def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; -def : Pat<(fmul (f64 (fpextend FP32:$src1)), - (f64 (extloadf32 bdxaddr12only:$addr))), +def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -let Uses = [FPC] in +let Uses = [FPC], mayRaiseFPException = 1 in def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (fpextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -let Uses = [FPC] in +let Uses = [FPC], mayRaiseFPException = 1 in def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), + def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; // Fused multiply-add. -let Uses = [FPC] in { - def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; - def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; + def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; - def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; - def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; + def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; + def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; } // Fused multiply-subtract. -let Uses = [FPC] in { - def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; - def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; + def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; - def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; - def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; + def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; + def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; } // Division. -let Uses = [FPC] in { - def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; - def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; - def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +let Uses = [FPC], mayRaiseFPException = 1 in { + def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>; + def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; + def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; - def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; - def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; + def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>; + def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; } // Divide to integer. -let Uses = [FPC], Defs = [CC] in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>; def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>; } @@ -529,7 +535,7 @@ // Comparisons //===----------------------------------------------------------------------===// -let Uses = [FPC], Defs = [CC], CCValues = 0xF in { +let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; @@ -570,7 +576,7 @@ } } - let Defs = [FPC] in { + let Defs = [FPC], mayRaiseFPException = 1 in { def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; } Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -924,29 +924,29 @@ // See comments in SystemZInstrFP.td for the suppression flags and // rounding modes. multiclass VectorRounding { - def : FPConversion; - def : FPConversion; - def : FPConversion; - def : FPConversion; - def : FPConversion; - def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; } let Predicates = [FeatureVector] in { // Add. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; - def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; - def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; - def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; - def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>; } } // Convert from fixed 64-bit. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; @@ -954,7 +954,7 @@ def : FPConversion; // Convert from logical 64-bit. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; @@ -962,7 +962,7 @@ def : FPConversion; // Convert to fixed 64-bit. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; @@ -971,7 +971,7 @@ def : FPConversion; // Convert to logical 64-bit. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; @@ -980,19 +980,19 @@ def : FPConversion; // Divide. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; - def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; - def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; - def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; - def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>; } } // Load FP integer. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; @@ -1000,7 +1000,7 @@ defm : VectorRounding; defm : VectorRounding; let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; @@ -1011,13 +1011,13 @@ } // Load lengthened. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; } let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in { def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; @@ -1030,7 +1030,7 @@ } // Load rounded. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; @@ -1038,7 +1038,7 @@ def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : FPConversion; let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in { def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; @@ -1053,11 +1053,11 @@ // Maximum. multiclass VectorMax { - def : FPMinMax; + def : FPMinMax; def : FPMinMax; } let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, v128db, v128db, 3, 0>; @@ -1079,11 +1079,11 @@ // Minimum. multiclass VectorMin { - def : FPMinMax; + def : FPMinMax; def : FPMinMax; } let Predicates = [FeatureVectorEnhancements1] in { - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, v128db, v128db, 3, 0>; @@ -1104,59 +1104,61 @@ } // Multiply. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; - def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; - def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; - def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; - def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>; } } // Multiply and add. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; - def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; - def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; - def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; - def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>; } } // Multiply and subtract. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; - def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; - def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; - def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; - def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>; } } // Negative multiply and add. - let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; - def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; - def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; - def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; - def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; - def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>; } // Negative multiply and subtract. - let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; - def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; - def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; - def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; - def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; - def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>; } // Perform sign operation. @@ -1197,26 +1199,26 @@ } // Square root. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; - def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; - def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; - def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; - def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>; } } // Subtract. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; - def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; - def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; - def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; - def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>; } } @@ -1239,7 +1241,7 @@ let Predicates = [FeatureVector] in { // Compare scalar. - let Uses = [FPC], Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1249,7 +1251,7 @@ } // Compare and signal scalar. - let Uses = [FPC], Defs = [CC] in { + let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { @@ -1259,7 +1261,7 @@ } // Compare equal. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, v128g, v128db, 3, 0>; @@ -1276,7 +1278,8 @@ } // Compare and signal equal. - let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, @@ -1290,7 +1293,7 @@ } // Compare high. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, v128g, v128db, 3, 0>; @@ -1307,7 +1310,8 @@ } // Compare and signal high. - let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, @@ -1321,7 +1325,7 @@ } // Compare high or equal. - let Uses = [FPC] in { + let Uses = [FPC], mayRaiseFPException = 1 in { def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, v128g, v128db, 3, 0>; @@ -1338,7 +1342,8 @@ } // Compare and signal high or equal. - let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in { + let Uses = [FPC], mayRaiseFPException = 1, + Predicates = [FeatureVectorEnhancements1] in { defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -662,21 +662,21 @@ (sub node:$src1, node:$src2)]>; // Fused multiply-subtract, using the natural operand order. -def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src1, node:$src2, (fneg node:$src3))>; +def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src1, node:$src2, (fneg node:$src3))>; // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. -def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, node:$src1)>; -def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fma node:$src2, node:$src3, (fneg node:$src1))>; +def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, node:$src1)>; +def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (any_fma node:$src2, node:$src3, (fneg node:$src1))>; // Negative fused multiply-add and multiply-subtract. -def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fma node:$src1, node:$src2, node:$src3))>; -def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (fneg (fms node:$src1, node:$src2, node:$src3))>; +def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fma node:$src1, node:$src2, node:$src3))>; +def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (any_fms node:$src1, node:$src2, node:$src3))>; // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; Index: test/CodeGen/SystemZ/fp-strict-add-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-01.ll +++ test/CodeGen/SystemZ/fp-strict-add-01.ll @@ -0,0 +1,173 @@ +; Test 32-bit floating-point strict addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) + +; Check register addition. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the AEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned AEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: aeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that AEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: aeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that additions of spilled values can use AEB rather than AEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %add0 = call float @llvm.experimental.constrained.fadd.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add1 = call float @llvm.experimental.constrained.fadd.f32( + float %add0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add2 = call float @llvm.experimental.constrained.fadd.f32( + float %add1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add3 = call float @llvm.experimental.constrained.fadd.f32( + float %add2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add4 = call float @llvm.experimental.constrained.fadd.f32( + float %add3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add5 = call float @llvm.experimental.constrained.fadd.f32( + float %add4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add6 = call float @llvm.experimental.constrained.fadd.f32( + float %add5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add7 = call float @llvm.experimental.constrained.fadd.f32( + float %add6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add8 = call float @llvm.experimental.constrained.fadd.f32( + float %add7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add9 = call float @llvm.experimental.constrained.fadd.f32( + float %add8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add10 = call float @llvm.experimental.constrained.fadd.f32( + float %add9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %add10 +} Index: test/CodeGen/SystemZ/fp-strict-add-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-02.ll +++ test/CodeGen/SystemZ/fp-strict-add-02.ll @@ -0,0 +1,172 @@ +; Test strict 64-bit floating-point addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s +declare double @foo() +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) + +; Check register addition. +define double @f1(double %f1, double %f2) { +; CHECK-LABEL: f1: +; CHECK: adbr %f0, %f2 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the ADB range. +define double @f2(double %f1, double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned ADB range. +define double @f3(double %f1, double *%base) { +; CHECK-LABEL: f3: +; CHECK: adb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that ADB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: adb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that additions of spilled values can use ADB rather than ADBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: adb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() + + %add0 = call double @llvm.experimental.constrained.fadd.f64( + double %ret, double %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add1 = call double @llvm.experimental.constrained.fadd.f64( + double %add0, double %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add2 = call double @llvm.experimental.constrained.fadd.f64( + double %add1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add3 = call double @llvm.experimental.constrained.fadd.f64( + double %add2, double %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add4 = call double @llvm.experimental.constrained.fadd.f64( + double %add3, double %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add5 = call double @llvm.experimental.constrained.fadd.f64( + double %add4, double %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add6 = call double @llvm.experimental.constrained.fadd.f64( + double %add5, double %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add7 = call double @llvm.experimental.constrained.fadd.f64( + double %add6, double %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add8 = call double @llvm.experimental.constrained.fadd.f64( + double %add7, double %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add9 = call double @llvm.experimental.constrained.fadd.f64( + double %add8, double %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add10 = call double @llvm.experimental.constrained.fadd.f64( + double %add9, double %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret double %add10 +} Index: test/CodeGen/SystemZ/fp-strict-add-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-03.ll +++ test/CodeGen/SystemZ/fp-strict-add-03.ll @@ -0,0 +1,25 @@ +; Test strict 128-bit floating-point addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata) + +; There is no memory form of 128-bit addition. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r2) +; CHECK-DAG: ld %f3, 8(%r2) +; CHECK: axbr %f0, %f1 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = call fp128 @llvm.experimental.constrained.fadd.f128( + fp128 %f1, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-add-04.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-04.ll +++ test/CodeGen/SystemZ/fp-strict-add-04.ll @@ -0,0 +1,22 @@ +; Test strict 128-bit floating-point addition on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata) + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = call fp128 @llvm.experimental.constrained.fadd.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr1 + ret void +} Index: test/CodeGen/SystemZ/fp-strict-alias.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-alias.ll +++ test/CodeGen/SystemZ/fp-strict-alias.ll @@ -0,0 +1,140 @@ +; Verify that strict FP operations are not rescheduled +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare float @llvm.sqrt.f32(float) +declare void @llvm.s390.sfpc(i32) + +; For non-strict operations, we expect the post-RA scheduler to +; separate the two square root instructions on z13. +define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; CHECK-LABEL: f1: +; CHECK: sqebr +; CHECK: {{aebr|sebr}} +; CHECK: sqebr +; CHECK: br %r14 + + %add = fadd float %f1, %f2 + %sub = fsub float %f3, %f4 + %sqrt1 = call float @llvm.sqrt.f32(float %f2) + %sqrt2 = call float @llvm.sqrt.f32(float %f4) + + %ptr1 = getelementptr float, float *%ptr0, i64 1 + %ptr2 = getelementptr float, float *%ptr0, i64 2 + %ptr3 = getelementptr float, float *%ptr0, i64 3 + + store float %add, float *%ptr0 + store float %sub, float *%ptr1 + store float %sqrt1, float *%ptr2 + store float %sqrt2, float *%ptr3 + + ret void +} + +; But for strict operations, this must not happen. +define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; CHECK-LABEL: f2: +; CHECK: {{aebr|sebr}} +; CHECK: {{aebr|sebr}} +; CHECK: sqebr +; CHECK: sqebr +; CHECK: br %r14 + + %add = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub = call float @llvm.experimental.constrained.fsub.f32( + float %f3, float %f4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %ptr1 = getelementptr float, float *%ptr0, i64 1 + %ptr2 = getelementptr float, float *%ptr0, i64 2 + %ptr3 = getelementptr float, float *%ptr0, i64 3 + + store float %add, float *%ptr0 + store float %sub, float *%ptr1 + store float %sqrt1, float *%ptr2 + store float %sqrt2, float *%ptr3 + + ret void +} + +; On the other hand, strict operations that use the fpexcept.ignore +; exception behaviour should be scheduled freely. +define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; CHECK-LABEL: f3: +; CHECK: sqebr +; CHECK: {{aebr|sebr}} +; CHECK: sqebr +; CHECK: br %r14 + + %add = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") + %sub = call float @llvm.experimental.constrained.fsub.f32( + float %f3, float %f4, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f4, + metadata !"round.dynamic", + metadata !"fpexcept.ignore") + + %ptr1 = getelementptr float, float *%ptr0, i64 1 + %ptr2 = getelementptr float, float *%ptr0, i64 2 + %ptr3 = getelementptr float, float *%ptr0, i64 3 + + store float %add, float *%ptr0 + store float %sub, float *%ptr1 + store float %sqrt1, float *%ptr2 + store float %sqrt2, float *%ptr3 + + ret void +} + +; However, even non-strict operations must not be scheduled across an SFPC. +define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; CHECK-LABEL: f4: +; CHECK: {{aebr|sebr}} +; CHECK: {{aebr|sebr}} +; CHECK: sfpc +; CHECK: sqebr +; CHECK: sqebr +; CHECK: br %r14 + + %add = fadd float %f1, %f2 + %sub = fsub float %f3, %f4 + call void @llvm.s390.sfpc(i32 0) + %sqrt1 = call float @llvm.sqrt.f32(float %f2) + %sqrt2 = call float @llvm.sqrt.f32(float %f4) + + %ptr1 = getelementptr float, float *%ptr0, i64 1 + %ptr2 = getelementptr float, float *%ptr0, i64 2 + %ptr3 = getelementptr float, float *%ptr0, i64 3 + + store float %add, float *%ptr0 + store float %sub, float *%ptr1 + store float %sqrt1, float *%ptr2 + store float %sqrt2, float *%ptr3 + + ret void +} + Index: test/CodeGen/SystemZ/fp-strict-div-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-div-01.ll +++ test/CodeGen/SystemZ/fp-strict-div-01.ll @@ -0,0 +1,173 @@ +; Test strict 32-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) + +; Check register division. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: debr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the DEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned DEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: deb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that DEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: deb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that divisions of spilled values can use DEB rather than DEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %div0 = call float @llvm.experimental.constrained.fdiv.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div1 = call float @llvm.experimental.constrained.fdiv.f32( + float %div0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div2 = call float @llvm.experimental.constrained.fdiv.f32( + float %div1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div3 = call float @llvm.experimental.constrained.fdiv.f32( + float %div2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div4 = call float @llvm.experimental.constrained.fdiv.f32( + float %div3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div5 = call float @llvm.experimental.constrained.fdiv.f32( + float %div4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div6 = call float @llvm.experimental.constrained.fdiv.f32( + float %div5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div7 = call float @llvm.experimental.constrained.fdiv.f32( + float %div6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div8 = call float @llvm.experimental.constrained.fdiv.f32( + float %div7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div9 = call float @llvm.experimental.constrained.fdiv.f32( + float %div8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div10 = call float @llvm.experimental.constrained.fdiv.f32( + float %div9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %div10 +} Index: test/CodeGen/SystemZ/fp-strict-div-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-div-02.ll +++ test/CodeGen/SystemZ/fp-strict-div-02.ll @@ -0,0 +1,173 @@ +; Test strict 64-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @foo() +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) + +; Check register division. +define double @f1(double %f1, double %f2) { +; CHECK-LABEL: f1: +; CHECK: ddbr %f0, %f2 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the DDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned DDB range. +define double @f3(double %f1, double *%base) { +; CHECK-LABEL: f3: +; CHECK: ddb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that DDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: ddb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that divisions of spilled values can use DDB rather than DDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: ddb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() + + %div0 = call double @llvm.experimental.constrained.fdiv.f64( + double %ret, double %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div1 = call double @llvm.experimental.constrained.fdiv.f64( + double %div0, double %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div2 = call double @llvm.experimental.constrained.fdiv.f64( + double %div1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div3 = call double @llvm.experimental.constrained.fdiv.f64( + double %div2, double %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div4 = call double @llvm.experimental.constrained.fdiv.f64( + double %div3, double %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div5 = call double @llvm.experimental.constrained.fdiv.f64( + double %div4, double %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div6 = call double @llvm.experimental.constrained.fdiv.f64( + double %div5, double %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div7 = call double @llvm.experimental.constrained.fdiv.f64( + double %div6, double %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div8 = call double @llvm.experimental.constrained.fdiv.f64( + double %div7, double %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div9 = call double @llvm.experimental.constrained.fdiv.f64( + double %div8, double %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %div10 = call double @llvm.experimental.constrained.fdiv.f64( + double %div9, double %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret double %div10 +} Index: test/CodeGen/SystemZ/fp-strict-div-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-div-03.ll +++ test/CodeGen/SystemZ/fp-strict-div-03.ll @@ -0,0 +1,25 @@ +; Test strict 128-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata) + +; There is no memory form of 128-bit division. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r2) +; CHECK-DAG: ld %f3, 8(%r2) +; CHECK: dxbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = call fp128 @llvm.experimental.constrained.fdiv.f128( + fp128 %f1, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-div-04.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-div-04.ll +++ test/CodeGen/SystemZ/fp-strict-div-04.ll @@ -0,0 +1,22 @@ +; Test strict 128-bit floating-point division on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata) + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = call fp128 @llvm.experimental.constrained.fdiv.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr1 + ret void +} Index: test/CodeGen/SystemZ/fp-strict-mul-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-01.ll +++ test/CodeGen/SystemZ/fp-strict-mul-01.ll @@ -0,0 +1,173 @@ +; Test strict multiplication of two f32s, producing an f32 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) + +; Check register multiplication. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: meebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the MEEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned MEEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: meeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that MEEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: meeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that multiplications of spilled values can use MEEB rather than MEEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %mul0 = call float @llvm.experimental.constrained.fmul.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul1 = call float @llvm.experimental.constrained.fmul.f32( + float %mul0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul2 = call float @llvm.experimental.constrained.fmul.f32( + float %mul1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul3 = call float @llvm.experimental.constrained.fmul.f32( + float %mul2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul4 = call float @llvm.experimental.constrained.fmul.f32( + float %mul3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul5 = call float @llvm.experimental.constrained.fmul.f32( + float %mul4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul6 = call float @llvm.experimental.constrained.fmul.f32( + float %mul5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul7 = call float @llvm.experimental.constrained.fmul.f32( + float %mul6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul8 = call float @llvm.experimental.constrained.fmul.f32( + float %mul7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul9 = call float @llvm.experimental.constrained.fmul.f32( + float %mul8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul10 = call float @llvm.experimental.constrained.fmul.f32( + float %mul9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %mul10 +} Index: test/CodeGen/SystemZ/fp-strict-mul-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-02.ll +++ test/CodeGen/SystemZ/fp-strict-mul-02.ll @@ -0,0 +1,283 @@ +; Test strict multiplication of two f32s, producing an f64 result. +; FIXME: we do not have a strict version of fpext yet +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @foo() +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) + +; Check register multiplication. +define double @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: mdebr %f0, %f2 +; CHECK: br %r14 + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the MDEB range. +define double @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned MDEB range. +define double @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: mdeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that MDEB allows indices. +define double @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mdeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1x, double %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that multiplications of spilled values can use MDEB rather than MDEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: mdeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %frob0 = fadd float %val0, %val0 + %frob1 = fadd float %val1, %val1 + %frob2 = fadd float %val2, %val2 + %frob3 = fadd float %val3, %val3 + %frob4 = fadd float %val4, %val4 + %frob5 = fadd float %val5, %val5 + %frob6 = fadd float %val6, %val6 + %frob7 = fadd float %val7, %val7 + %frob8 = fadd float %val8, %val8 + %frob9 = fadd float %val9, %val9 + %frob10 = fadd float %val9, %val10 + + store float %frob0, float *%ptr0 + store float %frob1, float *%ptr1 + store float %frob2, float *%ptr2 + store float %frob3, float *%ptr3 + store float %frob4, float *%ptr4 + store float %frob5, float *%ptr5 + store float %frob6, float *%ptr6 + store float %frob7, float *%ptr7 + store float %frob8, float *%ptr8 + store float %frob9, float *%ptr9 + store float %frob10, float *%ptr10 + + %ret = call float @foo() + + %accext0 = fpext float %ret to double + %ext0 = fpext float %frob0 to double + %mul0 = call double @llvm.experimental.constrained.fmul.f64( + double %accext0, double %ext0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra0 = call double @llvm.experimental.constrained.fmul.f64( + double %mul0, double 1.01, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc0 = fptrunc double %extra0 to float + + %accext1 = fpext float %trunc0 to double + %ext1 = fpext float %frob1 to double + %mul1 = call double @llvm.experimental.constrained.fmul.f64( + double %accext1, double %ext1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra1 = call double @llvm.experimental.constrained.fmul.f64( + double %mul1, double 1.11, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc1 = fptrunc double %extra1 to float + + %accext2 = fpext float %trunc1 to double + %ext2 = fpext float %frob2 to double + %mul2 = call double @llvm.experimental.constrained.fmul.f64( + double %accext2, double %ext2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra2 = call double @llvm.experimental.constrained.fmul.f64( + double %mul2, double 1.21, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc2 = fptrunc double %extra2 to float + + %accext3 = fpext float %trunc2 to double + %ext3 = fpext float %frob3 to double + %mul3 = call double @llvm.experimental.constrained.fmul.f64( + double %accext3, double %ext3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra3 = call double @llvm.experimental.constrained.fmul.f64( + double %mul3, double 1.31, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc3 = fptrunc double %extra3 to float + + %accext4 = fpext float %trunc3 to double + %ext4 = fpext float %frob4 to double + %mul4 = call double @llvm.experimental.constrained.fmul.f64( + double %accext4, double %ext4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra4 = call double @llvm.experimental.constrained.fmul.f64( + double %mul4, double 1.41, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc4 = fptrunc double %extra4 to float + + %accext5 = fpext float %trunc4 to double + %ext5 = fpext float %frob5 to double + %mul5 = call double @llvm.experimental.constrained.fmul.f64( + double %accext5, double %ext5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra5 = call double @llvm.experimental.constrained.fmul.f64( + double %mul5, double 1.51, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc5 = fptrunc double %extra5 to float + + %accext6 = fpext float %trunc5 to double + %ext6 = fpext float %frob6 to double + %mul6 = call double @llvm.experimental.constrained.fmul.f64( + double %accext6, double %ext6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra6 = call double @llvm.experimental.constrained.fmul.f64( + double %mul6, double 1.61, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc6 = fptrunc double %extra6 to float + + %accext7 = fpext float %trunc6 to double + %ext7 = fpext float %frob7 to double + %mul7 = call double @llvm.experimental.constrained.fmul.f64( + double %accext7, double %ext7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra7 = call double @llvm.experimental.constrained.fmul.f64( + double %mul7, double 1.71, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc7 = fptrunc double %extra7 to float + + %accext8 = fpext float %trunc7 to double + %ext8 = fpext float %frob8 to double + %mul8 = call double @llvm.experimental.constrained.fmul.f64( + double %accext8, double %ext8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra8 = call double @llvm.experimental.constrained.fmul.f64( + double %mul8, double 1.81, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc8 = fptrunc double %extra8 to float + + %accext9 = fpext float %trunc8 to double + %ext9 = fpext float %frob9 to double + %mul9 = call double @llvm.experimental.constrained.fmul.f64( + double %accext9, double %ext9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %extra9 = call double @llvm.experimental.constrained.fmul.f64( + double %mul9, double 1.91, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc9 = fptrunc double %extra9 to float + + ret float %trunc9 +} Index: test/CodeGen/SystemZ/fp-strict-mul-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-03.ll +++ test/CodeGen/SystemZ/fp-strict-mul-03.ll @@ -0,0 +1,173 @@ +; Test strict multiplication of two f64s, producing an f64 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @foo() +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) + +; Check register multiplication. +define double @f1(double %f1, double %f2) { +; CHECK-LABEL: f1: +; CHECK: mdbr %f0, %f2 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the MDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned MDB range. +define double @f3(double %f1, double *%base) { +; CHECK-LABEL: f3: +; CHECK: mdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that MDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: mdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that multiplications of spilled values can use MDB rather than MDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: mdb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() + + %mul0 = call double @llvm.experimental.constrained.fmul.f64( + double %ret, double %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul1 = call double @llvm.experimental.constrained.fmul.f64( + double %mul0, double %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul2 = call double @llvm.experimental.constrained.fmul.f64( + double %mul1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul3 = call double @llvm.experimental.constrained.fmul.f64( + double %mul2, double %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul4 = call double @llvm.experimental.constrained.fmul.f64( + double %mul3, double %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul5 = call double @llvm.experimental.constrained.fmul.f64( + double %mul4, double %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul6 = call double @llvm.experimental.constrained.fmul.f64( + double %mul5, double %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul7 = call double @llvm.experimental.constrained.fmul.f64( + double %mul6, double %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul8 = call double @llvm.experimental.constrained.fmul.f64( + double %mul7, double %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul9 = call double @llvm.experimental.constrained.fmul.f64( + double %mul8, double %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %mul10 = call double @llvm.experimental.constrained.fmul.f64( + double %mul9, double %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret double %mul10 +} Index: test/CodeGen/SystemZ/fp-strict-mul-04.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-04.ll +++ test/CodeGen/SystemZ/fp-strict-mul-04.ll @@ -0,0 +1,314 @@ +; Test strict multiplication of two f64s, producing an f128 result. +; FIXME: we do not have a strict version of fpext yet +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata) + +declare double @foo() + +; Check register multiplication. "mxdbr %f0, %f2" is not valid from LLVM's +; point of view, because %f2 is the low register of the FP128 %f0. Pass the +; multiplier in %f4 instead. +define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) { +; CHECK-LABEL: f1: +; CHECK: mxdbr %f0, %f4 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check the low end of the MXDB range. +define void @f2(double %f1, double *%ptr, fp128 *%dst) { +; CHECK-LABEL: f2: +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %f2 = load double, double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check the high end of the aligned MXDB range. +define void @f3(double %f1, double *%base, fp128 *%dst) { +; CHECK-LABEL: f3: +; CHECK: mxdb %f0, 4088(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(double %f1, double *%base, fp128 *%dst) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f5(double %f1, double *%base, fp128 *%dst) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check that MXDB allows indices. +define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: mxdb %f0, 800(%r1,%r2) +; CHECK: std %f0, 0(%r4) +; CHECK: std %f2, 8(%r4) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + +; Check that multiplications of spilled values can use MXDB rather than MXDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: mxdb %f0, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %frob0 = fadd double %val0, %val0 + %frob1 = fadd double %val1, %val1 + %frob2 = fadd double %val2, %val2 + %frob3 = fadd double %val3, %val3 + %frob4 = fadd double %val4, %val4 + %frob5 = fadd double %val5, %val5 + %frob6 = fadd double %val6, %val6 + %frob7 = fadd double %val7, %val7 + %frob8 = fadd double %val8, %val8 + %frob9 = fadd double %val9, %val9 + %frob10 = fadd double %val9, %val10 + + store double %frob0, double *%ptr0 + store double %frob1, double *%ptr1 + store double %frob2, double *%ptr2 + store double %frob3, double *%ptr3 + store double %frob4, double *%ptr4 + store double %frob5, double *%ptr5 + store double %frob6, double *%ptr6 + store double %frob7, double *%ptr7 + store double %frob8, double *%ptr8 + store double %frob9, double *%ptr9 + store double %frob10, double *%ptr10 + + %ret = call double @foo() + + %accext0 = fpext double %ret to fp128 + %ext0 = fpext double %frob0 to fp128 + %mul0 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext0, fp128 %ext0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const0 = fpext double 1.01 to fp128 + %extra0 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul0, fp128 %const0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc0 = fptrunc fp128 %extra0 to double + + %accext1 = fpext double %trunc0 to fp128 + %ext1 = fpext double %frob1 to fp128 + %mul1 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext1, fp128 %ext1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const1 = fpext double 1.11 to fp128 + %extra1 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul1, fp128 %const1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc1 = fptrunc fp128 %extra1 to double + + %accext2 = fpext double %trunc1 to fp128 + %ext2 = fpext double %frob2 to fp128 + %mul2 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext2, fp128 %ext2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const2 = fpext double 1.21 to fp128 + %extra2 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul2, fp128 %const2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc2 = fptrunc fp128 %extra2 to double + + %accext3 = fpext double %trunc2 to fp128 + %ext3 = fpext double %frob3 to fp128 + %mul3 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext3, fp128 %ext3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const3 = fpext double 1.31 to fp128 + %extra3 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul3, fp128 %const3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc3 = fptrunc fp128 %extra3 to double + + %accext4 = fpext double %trunc3 to fp128 + %ext4 = fpext double %frob4 to fp128 + %mul4 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext4, fp128 %ext4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const4 = fpext double 1.41 to fp128 + %extra4 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul4, fp128 %const4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc4 = fptrunc fp128 %extra4 to double + + %accext5 = fpext double %trunc4 to fp128 + %ext5 = fpext double %frob5 to fp128 + %mul5 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext5, fp128 %ext5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const5 = fpext double 1.51 to fp128 + %extra5 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul5, fp128 %const5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc5 = fptrunc fp128 %extra5 to double + + %accext6 = fpext double %trunc5 to fp128 + %ext6 = fpext double %frob6 to fp128 + %mul6 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext6, fp128 %ext6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const6 = fpext double 1.61 to fp128 + %extra6 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul6, fp128 %const6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc6 = fptrunc fp128 %extra6 to double + + %accext7 = fpext double %trunc6 to fp128 + %ext7 = fpext double %frob7 to fp128 + %mul7 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext7, fp128 %ext7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const7 = fpext double 1.71 to fp128 + %extra7 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul7, fp128 %const7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc7 = fptrunc fp128 %extra7 to double + + %accext8 = fpext double %trunc7 to fp128 + %ext8 = fpext double %frob8 to fp128 + %mul8 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext8, fp128 %ext8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const8 = fpext double 1.81 to fp128 + %extra8 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul8, fp128 %const8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc8 = fptrunc fp128 %extra8 to double + + %accext9 = fpext double %trunc8 to fp128 + %ext9 = fpext double %frob9 to fp128 + %mul9 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %accext9, fp128 %ext9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %const9 = fpext double 1.91 to fp128 + %extra9 = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %mul9, fp128 %const9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %trunc9 = fptrunc fp128 %extra9 to double + + ret double %trunc9 +} Index: test/CodeGen/SystemZ/fp-strict-mul-05.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-05.ll +++ test/CodeGen/SystemZ/fp-strict-mul-05.ll @@ -0,0 +1,25 @@ +; Test strict multiplication of two f128s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata) + +; There is no memory form of 128-bit multiplication. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r2) +; CHECK-DAG: ld %f3, 8(%r2) +; CHECK: mxbr %f0, %f1 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %diff = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %diff, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-mul-06.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-06.ll +++ test/CodeGen/SystemZ/fp-strict-mul-06.ll @@ -0,0 +1,137 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) + +define float @f1(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f1: +; CHECK-SCALAR: maebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f2(float %f1, float *%ptr, float %acc) { +; CHECK-LABEL: f2: +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f3(float %f1, float *%base, float %acc) { +; CHECK-LABEL: f3: +; CHECK: maeb %f2, %f0, 4092(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f4(float %f1, float *%base, float %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f5(float %f1, float *%base, float %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f6(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: maeb %f2, %f0, 0(%r1,%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 %index + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f7(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f7: +; CHECK: sllg %r1, %r3, 2 +; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1023 + %ptr = getelementptr float, float *%base, i64 %index2 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f8(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f8: +; CHECK: sllg %r1, %r3, 2 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: maeb %f2, %f0, 0(%r1) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1024 + %ptr = getelementptr float, float *%base, i64 %index2 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} Index: test/CodeGen/SystemZ/fp-strict-mul-07.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-07.ll +++ test/CodeGen/SystemZ/fp-strict-mul-07.ll @@ -0,0 +1,130 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f1: +; CHECK-SCALAR: madbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f2(double %f1, double *%ptr, double %acc) { +; CHECK-LABEL: f2: +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f3(double %f1, double *%base, double %acc) { +; CHECK-LABEL: f3: +; CHECK: madb %f2, %f0, 4088(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f4(double %f1, double *%base, double %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f5(double %f1, double *%base, double %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f6(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: madb %f2, %f0, 0(%r1,%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 %index + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f7(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f7: +; CHECK: sllg %r1, %r3, 3 +; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 511 + %ptr = getelementptr double, double *%base, i64 %index2 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f8(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f8: +; CHECK: sllg %r1, %r3, 3 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: madb %f2, %f0, 0(%r1) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 512 + %ptr = getelementptr double, double *%base, i64 %index2 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} Index: test/CodeGen/SystemZ/fp-strict-mul-08.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-08.ll +++ test/CodeGen/SystemZ/fp-strict-mul-08.ll @@ -0,0 +1,145 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata) + +define float @f1(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f1: +; CHECK-SCALAR: msebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f2(float %f1, float *%ptr, float %acc) { +; CHECK-LABEL: f2: +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f3(float %f1, float *%base, float %acc) { +; CHECK-LABEL: f3: +; CHECK: mseb %f2, %f0, 4092(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f4(float %f1, float *%base, float %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f5(float %f1, float *%base, float %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f6(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mseb %f2, %f0, 0(%r1,%r2) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 %index + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f7(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f7: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1023 + %ptr = getelementptr float, float *%base, i64 %index2 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f8(float %f1, float *%base, i64 %index, float %acc) { +; CHECK-LABEL: f8: +; CHECK: sllg %r1, %r3, 2 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: mseb %f2, %f0, 0(%r1) +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1024 + %ptr = getelementptr float, float *%base, i64 %index2 + %f2 = load float, float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} Index: test/CodeGen/SystemZ/fp-strict-mul-09.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-09.ll +++ test/CodeGen/SystemZ/fp-strict-mul-09.ll @@ -0,0 +1,138 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f1: +; CHECK-SCALAR: msdbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f2(double %f1, double *%ptr, double %acc) { +; CHECK-LABEL: f2: +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f3(double %f1, double *%base, double %acc) { +; CHECK-LABEL: f3: +; CHECK: msdb %f2, %f0, 4088(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f4(double %f1, double *%base, double %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f5(double %f1, double *%base, double %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f6(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: msdb %f2, %f0, 0(%r1,%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 %index + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f7(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f7: +; CHECK: sllg %r1, %r3, 3 +; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 511 + %ptr = getelementptr double, double *%base, i64 %index2 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f8(double %f1, double *%base, i64 %index, double %acc) { +; CHECK-LABEL: f8: +; CHECK: sllg %r1, %r3, 3 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: msdb %f2, %f0, 0(%r1) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 512 + %ptr = getelementptr double, double *%base, i64 %index2 + %f2 = load double, double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} Index: test/CodeGen/SystemZ/fp-strict-mul-10.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-10.ll +++ test/CodeGen/SystemZ/fp-strict-mul-10.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata) +declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f1: +; CHECK: wfnmadb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negres = fsub double -0.0, %res + ret double %negres +} + +define double @f2(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f2: +; CHECK: wfnmsdb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub double -0.0, %acc + %res = call double @llvm.experimental.constrained.fma.f64 ( + double %f1, double %f2, double %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negres = fsub double -0.0, %res + ret double %negres +} + +define float @f3(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f3: +; CHECK: wfnmasb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negres = fsub float -0.0, %res + ret float %negres +} + +define float @f4(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f4: +; CHECK: wfnmssb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub float -0.0, %acc + %res = call float @llvm.experimental.constrained.fma.f32 ( + float %f1, float %f2, float %negacc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negres = fsub float -0.0, %res + ret float %negres +} + Index: test/CodeGen/SystemZ/fp-strict-mul-11.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-mul-11.ll +++ test/CodeGen/SystemZ/fp-strict-mul-11.ll @@ -0,0 +1,40 @@ +; Test strict 128-bit floating-point multiplication on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata) + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr1 + ret void +} + +define void @f2(double %f1, double %f2, fp128 *%dst) { +; CHECK-LABEL: f2: +; CHECK-DAG: wflld [[REG1:%v[0-9]+]], %f0 +; CHECK-DAG: wflld [[REG2:%v[0-9]+]], %f2 +; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1x, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%dst + ret void +} + Index: test/CodeGen/SystemZ/fp-strict-round-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-round-01.ll +++ test/CodeGen/SystemZ/fp-strict-round-01.ll @@ -0,0 +1,250 @@ +; Test strict rounding functions for z10. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; Test rint for f32. +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebr %f0, 0, %f0 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.rint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test rint for f64. +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: fidbr %f0, 0, %f0 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.rint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.rint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: brasl %r14, nearbyintf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.nearbyint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: brasl %r14, nearbyint@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: brasl %r14, nearbyintl@PLT +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.nearbyint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, floorf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.floor.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test floor for f64. +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, floor@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.floor.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: brasl %r14, floorl@PLT +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.floor.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: brasl %r14, ceilf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test ceil for f64. +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, ceil@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, ceill@PLT +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.ceil.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: brasl %r14, truncf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test trunc for f64. +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, trunc@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: brasl %r14, truncl@PLT +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.trunc.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, roundf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.round.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test round for f64. +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: brasl %r14, round@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.round.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: brasl %r14, roundl@PLT +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.round.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + Index: test/CodeGen/SystemZ/fp-strict-round-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-round-02.ll +++ test/CodeGen/SystemZ/fp-strict-round-02.ll @@ -0,0 +1,254 @@ +; Test strict rounding functions for z196 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +; Test rint for f32. +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebr %f0, 0, %f0 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.rint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test rint for f64. +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK-SCALAR: fidbr %f0, 0, %f0 +; CHECK-VECTOR: fidbra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.rint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.rint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: fiebra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.nearbyint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: fidbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: fixbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.nearbyint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: fiebra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.floor.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test floor for f64. +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: fidbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.floor.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: fixbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.floor.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: fiebra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test ceil for f64. +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: fidbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: fixbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.ceil.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test trunc for f64. +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: fidbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: fixbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.trunc.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.round.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test round for f64. +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: fidbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.round.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: fixbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.round.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + Index: test/CodeGen/SystemZ/fp-strict-round-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-round-03.ll +++ test/CodeGen/SystemZ/fp-strict-round-03.ll @@ -0,0 +1,262 @@ +; Test strict rounding functions for z14 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test rint for f32. +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.rint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test rint for f64. +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: fidbra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.rint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 0, 0 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.rint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: fiebra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.nearbyint.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: fidbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 0 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.nearbyint.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: fiebra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.floor.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test floor for f64. +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: fidbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.floor.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 7 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.floor.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: fiebra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test ceil for f64. +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: fidbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 6 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.ceil.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test trunc for f64. +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: fidbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 5 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.trunc.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.round.f32( + float %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Test round for f64. +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: fidbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.round.f64( + double %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 1 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.round.f128( + fp128 %src, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} + Index: test/CodeGen/SystemZ/fp-strict-sqrt-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sqrt-01.ll +++ test/CodeGen/SystemZ/fp-strict-sqrt-01.ll @@ -0,0 +1,94 @@ +; Test strict 32-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) + +; Check register square root. +define float @f1(float %val) { +; CHECK-LABEL: f1: +; CHECK: sqebr %f0, %f0 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the SQEB range. +define float @f2(float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %val = load float, float *%ptr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned SQEB range. +define float @f3(float *%base) { +; CHECK-LABEL: f3: +; CHECK: sqeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %val = load float, float *%ptr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %val = load float, float *%ptr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %val = load float, float *%ptr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that SQEB allows indices. +define float @f6(float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: sqeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %val = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + Index: test/CodeGen/SystemZ/fp-strict-sqrt-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sqrt-02.ll +++ test/CodeGen/SystemZ/fp-strict-sqrt-02.ll @@ -0,0 +1,94 @@ +; Test strict 64-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) + +; Check register square root. +define double @f1(double %val) { +; CHECK-LABEL: f1: +; CHECK: sqdbr %f0, %f0 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the SQDB range. +define double @f2(double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %val = load double, double *%ptr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned SQDB range. +define double @f3(double *%base) { +; CHECK-LABEL: f3: +; CHECK: sqdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %val = load double, double *%ptr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %val = load double, double *%ptr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %val = load double, double *%ptr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that SQDB allows indices. +define double @f6(double *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: sqdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %val = load double, double *%ptr2 + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + Index: test/CodeGen/SystemZ/fp-strict-sqrt-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sqrt-03.ll +++ test/CodeGen/SystemZ/fp-strict-sqrt-03.ll @@ -0,0 +1,23 @@ +; Test strict 128-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) + +; There's no memory form of SQXBR. +define void @f1(fp128 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: sqxbr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %orig = load fp128, fp128 *%ptr + %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128( + fp128 %orig, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sqrt, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-sqrt-04.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sqrt-04.ll +++ test/CodeGen/SystemZ/fp-strict-sqrt-04.ll @@ -0,0 +1,20 @@ +; Test strict 128-bit floating-point square root on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) + +define void @f1(fp128 *%ptr) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfsqxb [[RES:%v[0-9]+]], [[REG]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %res = call fp128 @llvm.experimental.constrained.sqrt.f128( + fp128 %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-sub-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sub-01.ll +++ test/CodeGen/SystemZ/fp-strict-sub-01.ll @@ -0,0 +1,173 @@ +; Test 32-bit floating-point strict subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) + +; Check register subtraction. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: sebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the SEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned SEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: seb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that SEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: seb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that subtractions of spilled values can use SEB rather than SEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %sub0 = call float @llvm.experimental.constrained.fsub.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub1 = call float @llvm.experimental.constrained.fsub.f32( + float %sub0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub2 = call float @llvm.experimental.constrained.fsub.f32( + float %sub1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub3 = call float @llvm.experimental.constrained.fsub.f32( + float %sub2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub4 = call float @llvm.experimental.constrained.fsub.f32( + float %sub3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub5 = call float @llvm.experimental.constrained.fsub.f32( + float %sub4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub6 = call float @llvm.experimental.constrained.fsub.f32( + float %sub5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub7 = call float @llvm.experimental.constrained.fsub.f32( + float %sub6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub8 = call float @llvm.experimental.constrained.fsub.f32( + float %sub7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub9 = call float @llvm.experimental.constrained.fsub.f32( + float %sub8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub10 = call float @llvm.experimental.constrained.fsub.f32( + float %sub9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %sub10 +} Index: test/CodeGen/SystemZ/fp-strict-sub-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sub-02.ll +++ test/CodeGen/SystemZ/fp-strict-sub-02.ll @@ -0,0 +1,173 @@ +; Test strict 64-bit floating-point subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @foo() +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) + +; Check register subtraction. +define double @f1(double %f1, double %f2) { +; CHECK-LABEL: f1: +; CHECK: sdbr %f0, %f2 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the low end of the SDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the high end of the aligned SDB range. +define double @f3(double %f1, double *%base) { +; CHECK-LABEL: f3: +; CHECK: sdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -8 +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that SDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: sdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +; Check that subtractions of spilled values can use SDB rather than SDBR. +define double @f7(double *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() + + %sub0 = call double @llvm.experimental.constrained.fsub.f64( + double %ret, double %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub1 = call double @llvm.experimental.constrained.fsub.f64( + double %sub0, double %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub2 = call double @llvm.experimental.constrained.fsub.f64( + double %sub1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub3 = call double @llvm.experimental.constrained.fsub.f64( + double %sub2, double %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub4 = call double @llvm.experimental.constrained.fsub.f64( + double %sub3, double %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub5 = call double @llvm.experimental.constrained.fsub.f64( + double %sub4, double %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub6 = call double @llvm.experimental.constrained.fsub.f64( + double %sub5, double %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub7 = call double @llvm.experimental.constrained.fsub.f64( + double %sub6, double %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub8 = call double @llvm.experimental.constrained.fsub.f64( + double %sub7, double %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub9 = call double @llvm.experimental.constrained.fsub.f64( + double %sub8, double %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sub10 = call double @llvm.experimental.constrained.fsub.f64( + double %sub9, double %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret double %sub10 +} Index: test/CodeGen/SystemZ/fp-strict-sub-03.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sub-03.ll +++ test/CodeGen/SystemZ/fp-strict-sub-03.ll @@ -0,0 +1,25 @@ +; Test strict 128-bit floating-point subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata) + +; There is no memory form of 128-bit subtraction. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r2) +; CHECK-DAG: ld %f3, 8(%r2) +; CHECK: sxbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = call fp128 @llvm.experimental.constrained.fsub.f128( + fp128 %f1, fp128 %f2x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr + ret void +} Index: test/CodeGen/SystemZ/fp-strict-sub-04.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-sub-04.ll +++ test/CodeGen/SystemZ/fp-strict-sub-04.ll @@ -0,0 +1,22 @@ +; Test strict 128-bit floating-point subtraction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata) + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = call fp128 @llvm.experimental.constrained.fsub.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %sum, fp128 *%ptr1 + ret void +} Index: test/CodeGen/SystemZ/vec-strict-add-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-add-01.ll +++ test/CodeGen/SystemZ/vec-strict-add-01.ll @@ -0,0 +1,33 @@ +; Test strict vector addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +; Test a v2f64 addition. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfadb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test an f64 addition that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfadb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = call double @llvm.experimental.constrained.fadd.f64( + double %scalar1, double %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} Index: test/CodeGen/SystemZ/vec-strict-add-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-add-02.ll +++ test/CodeGen/SystemZ/vec-strict-add-02.ll @@ -0,0 +1,33 @@ +; Test strict vector addition on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +; Test a v4f32 addition. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfasb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test an f32 addition that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfasb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = call float @llvm.experimental.constrained.fadd.f32( + float %scalar1, float %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} Index: test/CodeGen/SystemZ/vec-strict-div-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-div-01.ll +++ test/CodeGen/SystemZ/vec-strict-div-01.ll @@ -0,0 +1,33 @@ +; Test strict vector division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +; Test a v2f64 division. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfddb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test an f64 division that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfddb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = call double @llvm.experimental.constrained.fdiv.f64( + double %scalar1, double %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} Index: test/CodeGen/SystemZ/vec-strict-div-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-div-02.ll +++ test/CodeGen/SystemZ/vec-strict-div-02.ll @@ -0,0 +1,33 @@ +; Test strict vector division on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +; Test a v4f32 division. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfdsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test an f32 division that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfdsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = call float @llvm.experimental.constrained.fdiv.f32( + float %scalar1, float %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} Index: test/CodeGen/SystemZ/vec-strict-max-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-max-01.ll +++ test/CodeGen/SystemZ/vec-strict-max-01.ll @@ -0,0 +1,80 @@ +; Test strict vector maximum on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata, metadata) + +; Test the f64 maxnum intrinsic. +define double @f1(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f1: +; CHECK: wfmaxdb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @llvm.experimental.constrained.maxnum.f64( + double %val1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} + +; Test the v2f64 maxnum intrinsic. +define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f2: +; CHECK: vfmaxdb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test the f32 maxnum intrinsic. +define float @f3(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f3: +; CHECK: wfmaxsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.experimental.constrained.maxnum.f32( + float %val1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} + +; Test the v4f32 maxnum intrinsic. +define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f4: +; CHECK: vfmaxsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test the f128 maxnum intrinsic. +define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f5: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @llvm.experimental.constrained.maxnum.f128( + fp128 %val1, fp128 %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128* %dst + ret void +} + Index: test/CodeGen/SystemZ/vec-strict-min-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-min-01.ll +++ test/CodeGen/SystemZ/vec-strict-min-01.ll @@ -0,0 +1,80 @@ +; Test strict vector minimum on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata, metadata) + +; Test the f64 minnum intrinsic. +define double @f1(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f1: +; CHECK: wfmindb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @llvm.experimental.constrained.minnum.f64( + double %val1, double %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} + +; Test the v2f64 minnum intrinsic. +define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f2: +; CHECK: vfmindb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test the f32 minnum intrinsic. +define float @f3(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f3: +; CHECK: wfminsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.experimental.constrained.minnum.f32( + float %val1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} + +; Test the v4f32 minnum intrinsic. +define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f4: +; CHECK: vfminsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.minnum.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test the f128 minnum intrinsic. +define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f5: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @llvm.experimental.constrained.minnum.f128( + fp128 %val1, fp128 %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store fp128 %res, fp128* %dst + ret void +} + Index: test/CodeGen/SystemZ/vec-strict-mul-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-mul-01.ll +++ test/CodeGen/SystemZ/vec-strict-mul-01.ll @@ -0,0 +1,33 @@ +; Test strict vector multiplication. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +; Test a v2f64 multiplication. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfmdb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test an f64 multiplication that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfmdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = call double @llvm.experimental.constrained.fmul.f64( + double %scalar1, double %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} Index: test/CodeGen/SystemZ/vec-strict-mul-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-mul-02.ll +++ test/CodeGen/SystemZ/vec-strict-mul-02.ll @@ -0,0 +1,36 @@ +; Test strict vector multiply-and-add. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) + +; Test a v2f64 multiply-and-add. +define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f4: +; CHECK: vfmadb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 ( + <2 x double> %val1, + <2 x double> %val2, + <2 x double> %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test a v2f64 multiply-and-subtract. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f5: +; CHECK: vfmsdb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <2 x double> , %val3 + %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 ( + <2 x double> %val1, + <2 x double> %val2, + <2 x double> %negval3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} Index: test/CodeGen/SystemZ/vec-strict-mul-03.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-mul-03.ll +++ test/CodeGen/SystemZ/vec-strict-mul-03.ll @@ -0,0 +1,33 @@ +; Test strict vector multiplication on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +; Test a v4f32 multiplication. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfmsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test an f32 multiplication that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfmsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = call float @llvm.experimental.constrained.fmul.f32( + float %scalar1, float %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} Index: test/CodeGen/SystemZ/vec-strict-mul-04.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-mul-04.ll +++ test/CodeGen/SystemZ/vec-strict-mul-04.ll @@ -0,0 +1,37 @@ +; Test strict vector multiply-and-add on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) + +; Test a v4f32 multiply-and-add. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f1: +; CHECK: vfmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 ( + <4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test a v4f32 multiply-and-subtract. +define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f2: +; CHECK: vfmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> , %val3 + %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 ( + <4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} Index: test/CodeGen/SystemZ/vec-strict-mul-05.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-mul-05.ll +++ test/CodeGen/SystemZ/vec-strict-mul-05.ll @@ -0,0 +1,75 @@ +; Test vector negative multiply-and-add on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) + +; Test a v2f64 negative multiply-and-add. +define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f1: +; CHECK: vfnmadb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 ( + <2 x double> %val1, + <2 x double> %val2, + <2 x double> %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negret = fsub <2 x double> , %ret + ret <2 x double> %negret +} + +; Test a v2f64 negative multiply-and-subtract. +define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f2: +; CHECK: vfnmsdb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <2 x double> , %val3 + %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 ( + <2 x double> %val1, + <2 x double> %val2, + <2 x double> %negval3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negret = fsub <2 x double> , %ret + ret <2 x double> %negret +} + +; Test a v4f32 negative multiply-and-add. +define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f3: +; CHECK: vfnmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 ( + <4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negret = fsub <4 x float> , %ret + ret <4 x float> %negret +} + +; Test a v4f32 negative multiply-and-subtract. +define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f4: +; CHECK: vfnmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> , %val3 + %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 ( + <4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %negret = fsub <4 x float> , %ret + ret <4 x float> %negret +} Index: test/CodeGen/SystemZ/vec-strict-round-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-round-01.ll +++ test/CodeGen/SystemZ/vec-strict-round-01.ll @@ -0,0 +1,155 @@ +; Test strict v2f64 rounding. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: vfidb %v24, %v24, 0, 0 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <2 x double> @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: vfidb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <2 x double> @f3(<2 x double> %val) { +; CHECK-LABEL: f3: +; CHECK: vfidb %v24, %v24, 4, 7 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <2 x double> @f4(<2 x double> %val) { +; CHECK-LABEL: f4: +; CHECK: vfidb %v24, %v24, 4, 6 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <2 x double> @f5(<2 x double> %val) { +; CHECK-LABEL: f5: +; CHECK: vfidb %v24, %v24, 4, 5 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <2 x double> @f6(<2 x double> %val) { +; CHECK-LABEL: f6: +; CHECK: vfidb %v24, %v24, 4, 1 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.round.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define double @f7(<2 x double> %val) { +; CHECK-LABEL: f7: +; CHECK: wfidb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.rint.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f8(<2 x double> %val) { +; CHECK-LABEL: f8: +; CHECK: wfidb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f9(<2 x double> %val) { +; CHECK-LABEL: f9: +; CHECK: wfidb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.floor.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + + +define double @f10(<2 x double> %val) { +; CHECK-LABEL: f10: +; CHECK: wfidb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.ceil.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f11(<2 x double> %val) { +; CHECK-LABEL: f11: +; CHECK: wfidb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.trunc.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define double @f12(<2 x double> %val) { +; CHECK-LABEL: f12: +; CHECK: wfidb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.round.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} Index: test/CodeGen/SystemZ/vec-strict-round-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-round-02.ll +++ test/CodeGen/SystemZ/vec-strict-round-02.ll @@ -0,0 +1,154 @@ +; Test strict v4f32 rounding on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata, metadata) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfisb %v24, %v24, 0, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <4 x float> @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: vfisb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <4 x float> @f3(<4 x float> %val) { +; CHECK-LABEL: f3: +; CHECK: vfisb %v24, %v24, 4, 7 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <4 x float> @f4(<4 x float> %val) { +; CHECK-LABEL: f4: +; CHECK: vfisb %v24, %v24, 4, 6 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <4 x float> @f5(<4 x float> %val) { +; CHECK-LABEL: f5: +; CHECK: vfisb %v24, %v24, 4, 5 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <4 x float> @f6(<4 x float> %val) { +; CHECK-LABEL: f6: +; CHECK: vfisb %v24, %v24, 4, 1 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.round.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define float @f7(<4 x float> %val) { +; CHECK-LABEL: f7: +; CHECK: wfisb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.rint.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f8(<4 x float> %val) { +; CHECK-LABEL: f8: +; CHECK: wfisb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.nearbyint.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f9(<4 x float> %val) { +; CHECK-LABEL: f9: +; CHECK: wfisb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.floor.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f10(<4 x float> %val) { +; CHECK-LABEL: f10: +; CHECK: wfisb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.ceil.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f11(<4 x float> %val) { +; CHECK-LABEL: f11: +; CHECK: wfisb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.trunc.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define float @f12(<4 x float> %val) { +; CHECK-LABEL: f12: +; CHECK: wfisb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.round.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} Index: test/CodeGen/SystemZ/vec-strict-sqrt-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-sqrt-01.ll +++ test/CodeGen/SystemZ/vec-strict-sqrt-01.ll @@ -0,0 +1,29 @@ +; Test f64 and v2f64 square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: vfsqdb %v24, %v24 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +define double @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = call double @llvm.experimental.constrained.sqrt.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} Index: test/CodeGen/SystemZ/vec-strict-sqrt-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-sqrt-02.ll +++ test/CodeGen/SystemZ/vec-strict-sqrt-02.ll @@ -0,0 +1,29 @@ +; Test strict f32 and v4f32 square root on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfsqsb %v24, %v24 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( + <4 x float> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +define float @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = call float @llvm.experimental.constrained.sqrt.f32( + float %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} Index: test/CodeGen/SystemZ/vec-strict-sub-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-sub-01.ll +++ test/CodeGen/SystemZ/vec-strict-sub-01.ll @@ -0,0 +1,34 @@ +; Test strict vector subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +; Test a v2f64 subtraction. +define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfsdb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ret +} + +; Test an f64 subtraction that uses vector registers. +define double @f7(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfsdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = call double @llvm.experimental.constrained.fsub.f64( + double %scalar1, double %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %ret +} + Index: test/CodeGen/SystemZ/vec-strict-sub-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-strict-sub-02.ll +++ test/CodeGen/SystemZ/vec-strict-sub-02.ll @@ -0,0 +1,33 @@ +; Test strict vector subtraction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +; Test a v4f32 subtraction. +define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfssb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %ret +} + +; Test an f32 subtraction that uses vector registers. +define float @f7(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfssb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = call float @llvm.experimental.constrained.fsub.f32( + float %scalar1, float %scalar2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %ret +} Index: utils/TableGen/CodeGenInstruction.h =================================================================== --- utils/TableGen/CodeGenInstruction.h +++ utils/TableGen/CodeGenInstruction.h @@ -249,6 +249,7 @@ bool mayLoad_Unset : 1; bool mayStore : 1; bool mayStore_Unset : 1; + bool mayRaiseFPException : 1; bool isPredicable : 1; bool isConvertibleToThreeAddress : 1; bool isCommutable : 1; Index: utils/TableGen/CodeGenInstruction.cpp =================================================================== --- utils/TableGen/CodeGenInstruction.cpp +++ utils/TableGen/CodeGenInstruction.cpp @@ -401,6 +401,7 @@ mayLoad_Unset = Unset; mayStore = R->getValueAsBitOrUnset("mayStore", Unset); mayStore_Unset = Unset; + mayRaiseFPException = R->getValueAsBit("mayRaiseFPException"); hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset); hasSideEffects_Unset = Unset; Index: utils/TableGen/InstrInfoEmitter.cpp =================================================================== --- utils/TableGen/InstrInfoEmitter.cpp +++ utils/TableGen/InstrInfoEmitter.cpp @@ -603,6 +603,7 @@ if (Inst.canFoldAsLoad) OS << "|(1ULL<