Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1084,7 +1084,9 @@ // node should be mutated. // // FIXME: The backends need a way to handle FP constraints. - if (Node->isStrictFPOpcode()) + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) Node = CurDAG->mutateStrictFPToFP(Node); DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -638,6 +638,26 @@ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + // Constrained floating-point operations default to expand. + setOperationAction(ISD::STRICT_FADD, VT, Expand); + setOperationAction(ISD::STRICT_FSUB, VT, Expand); + setOperationAction(ISD::STRICT_FMUL, VT, Expand); + setOperationAction(ISD::STRICT_FDIV, VT, Expand); + setOperationAction(ISD::STRICT_FREM, VT, Expand); + setOperationAction(ISD::STRICT_FMA, VT, Expand); + setOperationAction(ISD::STRICT_FSQRT, VT, Expand); + setOperationAction(ISD::STRICT_FPOW, VT, Expand); + setOperationAction(ISD::STRICT_FPOWI, VT, Expand); + setOperationAction(ISD::STRICT_FSIN, VT, Expand); + setOperationAction(ISD::STRICT_FCOS, VT, Expand); + setOperationAction(ISD::STRICT_FEXP, VT, Expand); + setOperationAction(ISD::STRICT_FEXP2, VT, Expand); + setOperationAction(ISD::STRICT_FLOG, VT, Expand); + setOperationAction(ISD::STRICT_FLOG10, VT, Expand); + setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_FRINT, VT, Expand); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZMachineFunctionInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -1541,6 +1542,25 @@ } break; } + + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FMA: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + Node = CurDAG->mutateStrictFPToFP(Node); + SelectCode(Node); + + SystemZMachineFunctionInfo *MFI = MF->getInfo(); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = MF->getMachineMemOperand( + MachinePointerInfo(MFI->getFPStatusPSV()), + MachineMemOperand::MOLoad | MachineMemOperand::MOStore, 0, 0); + cast(Node)->setMemRefs(MemOp, MemOp + 1); + return; } SelectCode(Node); Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -401,6 +401,18 @@ setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + + // Handle constrained floating-point operations. + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + setOperationAction(ISD::STRICT_FMA, VT, Legal); + setOperationAction(ISD::STRICT_FSQRT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + } } } Index: lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -10,10 +10,29 @@ #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { +class SystemZFPStatusPseudoSourceValue : public PseudoSourceValue { +public: + explicit SystemZFPStatusPseudoSourceValue(const TargetInstrInfo &TII) : + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + + bool isConstant(const MachineFrameInfo *) const override { + return false; + } + + bool isAliased(const MachineFrameInfo *) const override { + return false; + } + + bool mayAlias(const MachineFrameInfo *) const override { + return false; + } +}; + class SystemZMachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); unsigned LowSavedGPR; @@ -25,12 +44,14 @@ int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; + SystemZFPStatusPseudoSourceValue FPStatusPSV; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} + ManipulatesSP(false), NumLocalDynamics(0), + FPStatusPSV(*MF.getSubtarget().getInstrInfo()) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -72,6 +93,11 @@ // Count number of local-dynamic TLS symbols used. unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + // Return the pseudo source value representing FP exception status. + const SystemZFPStatusPseudoSourceValue *getFPStatusPSV(void) { + return &FPStatusPSV; + } }; } // end namespace llvm Index: test/CodeGen/SystemZ/fp-strict-add-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-add-01.ll +++ test/CodeGen/SystemZ/fp-strict-add-01.ll @@ -0,0 +1,173 @@ +; Test 32-bit floating-point strict addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @foo() +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) + +; Check register addition. +define float @f1(float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the low end of the AEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the high end of the aligned AEB range. +define float @f3(float %f1, float *%base) { +; CHECK-LABEL: f3: +; CHECK: aeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, -4 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that AEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: aeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +; Check that additions of spilled values can use AEB rather than AEBR. +define float @f7(float *%ptr0) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() + + %add0 = call float @llvm.experimental.constrained.fadd.f32( + float %ret, float %val0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add1 = call float @llvm.experimental.constrained.fadd.f32( + float %add0, float %val1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add2 = call float @llvm.experimental.constrained.fadd.f32( + float %add1, float %val2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add3 = call float @llvm.experimental.constrained.fadd.f32( + float %add2, float %val3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add4 = call float @llvm.experimental.constrained.fadd.f32( + float %add3, float %val4, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add5 = call float @llvm.experimental.constrained.fadd.f32( + float %add4, float %val5, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add6 = call float @llvm.experimental.constrained.fadd.f32( + float %add5, float %val6, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add7 = call float @llvm.experimental.constrained.fadd.f32( + float %add6, float %val7, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add8 = call float @llvm.experimental.constrained.fadd.f32( + float %add7, float %val8, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add9 = call float @llvm.experimental.constrained.fadd.f32( + float %add8, float %val9, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add10 = call float @llvm.experimental.constrained.fadd.f32( + float %add9, float %val10, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + ret float %add10 +}