diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -685,6 +685,7 @@ SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; bool canTreatAsByteVector(EVT VT) const; SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -643,6 +643,8 @@ setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, @@ -5806,6 +5808,8 @@ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); + case ISD::GET_ROUNDING: + return lowerGET_ROUNDING(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -9035,3 +9039,43 @@ return &SystemZ::ADDR128BitRegClass; return TargetLowering::getRepRegClassFor(VT); } + +SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + /* + The rounding method is in FPC Byte 3 bits 6-7, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + */ + + // Save FPC to register. + SDValue Chain = Op.getOperand(0); + SDValue EFPC( + DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); + Chain = EFPC.getValue(1); + + // Transform as necessary + SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, + DAG.getConstant(3, dl, MVT::i32)); + // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 + SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, + DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, + DAG.getConstant(1, dl, MVT::i32))); + + SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, + DAG.getConstant(1, dl, MVT::i32)); + RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); + + return DAG.getMergeValues({RetVal, Chain}, dl); +} diff --git a/llvm/test/CodeGen/SystemZ/flt-rounds.ll b/llvm/test/CodeGen/SystemZ/flt-rounds.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/flt-rounds.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=s390x-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -verify-machineinstrs -O3 < %s | FileCheck %s + +@changed = dso_local global i32 0, align 4 + +define dso_local signext i32 @test_flt_rounds() nounwind { +; CHECK-LABEL: test_flt_rounds: +; CHECK: # %bb.0: +; CHECK-NEXT: efpc %r0 +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: nilf %r1, 3 +; CHECK-NEXT: rxsbg %r1, %r0, 63, 63, 63 +; CHECK-NEXT: xilf %r1, 1 +; CHECK-NEXT: llgfr %r2, %r1 +; CHECK-NEXT: br %r14 + %1 = call i32 @llvm.get.rounding() + ret i32 %1 +} + +declare i32 @llvm.get.rounding() nounwind + +define dso_local signext i32 @test_order(i32 noundef signext %0) nounwind { +; CHECK-LABEL: test_order: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: efpc %r0 +; CHECK-NEXT: lr %r13, %r0 +; CHECK-NEXT: nilf %r13, 3 +; CHECK-NEXT: rxsbg %r13, %r0, 63, 63, 63 +; CHECK-NEXT: xilf %r13, 1 +; CHECK-NEXT: brasl %r14, fesetround@PLT +; CHECK-NEXT: efpc %r0 +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: nilf %r1, 3 +; CHECK-NEXT: rxsbg %r1, %r0, 63, 63, 63 +; CHECK-NEXT: xilf %r1, 1 +; CHECK-NEXT: crje %r13, %r1, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: strl %r0, changed +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: llgfr %r2, %r13 +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 + %2 = call i32 @llvm.get.rounding() + %3 = call signext i32 @fesetround(i32 noundef signext %0) + %4 = call i32 @llvm.get.rounding() + %5 = icmp ne i32 %2, %4 + br i1 %5, label %6, label %7 + +6: ; preds = %1 + store i32 1, ptr @changed, align 4 + br label %7 + +7: ; preds = %6, %1 + ret i32 %2 +} + +declare dso_local signext i32 @fesetround(i32 noundef signext) nounwind