diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -688,6 +688,7 @@ SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; bool canTreatAsByteVector(EVT VT) const; SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -643,6 +643,8 @@ setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, @@ -5806,6 +5808,8 @@ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); + case ISD::GET_ROUNDING: + return lowerGET_ROUNDING(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -9036,3 +9040,49 @@ return &SystemZ::ADDR128BitRegClass; return TargetLowering::getRepRegClassFor(VT); } + +SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + /* + The rounding method is in FPC Byte 3 bits 6-7, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + */ + + // Save FPC to register. + SDValue Chain = Op.getOperand(0); + SDValue EFPC( + DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); + Chain = EFPC.getValue(1); + + // Transform as necessary + SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, + DAG.getConstant(3, dl, MVT::i32)); + SDValue CWD2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::XOR, dl, MVT::i32, EFPC, + DAG.getConstant(3, dl, MVT::i32)), + DAG.getConstant(3, dl, MVT::i32)), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); + + EVT VT = Op.getValueType(); + RetVal = + DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), + dl, VT, RetVal); + + return DAG.getMergeValues({RetVal, Chain}, dl); +} diff --git a/llvm/test/CodeGen/SystemZ/flt-rounds.ll b/llvm/test/CodeGen/SystemZ/flt-rounds.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/flt-rounds.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=s390x-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -verify-machineinstrs -O3 < %s | FileCheck %s + +@changed = dso_local global i32 0, align 4 + +define dso_local signext i32 @test_flt_rounds() nounwind { +; CHECK-LABEL: test_flt_rounds: + %1 = call i32 @llvm.get.rounding() +; CHECK: efpc %r{{[0-9]+}} + ret i32 %1 +} + +declare i32 @llvm.get.rounding() nounwind + +define dso_local signext i32 @test_order(i32 noundef signext %0) nounwind { +; CHECK-LABEL: test_order: + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %4 = call i32 @llvm.get.rounding() +; CHECK: efpc %r{{[0-9]+}} + store i32 %4, ptr %3, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call signext i32 @fesetround(i32 noundef signext %5) #3 +; CHECK: brasl %r{{[0-9]+}}, fesetround@PLT + %7 = load i32, ptr %3, align 4 + %8 = call i32 @llvm.get.rounding() +; CHECK: efpc %r{{[0-9]+}} + %9 = icmp ne i32 %7, %8 + br i1 %9, label %10, label %11 + +10: ; preds = %1 + store i32 1, ptr @changed, align 4 + br label %11 + +11: ; preds = %10, %1 + %12 = load i32, ptr %3, align 4 + ret i32 %12 +} + +declare dso_local signext i32 @fesetround(i32 noundef signext) nounwind