diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -332,6 +332,21 @@ } // end namespace ARMISD + namespace ARM { + /// Possible values of current rounding mode, which is specified in bits + /// 23:22 of FPSCR. + enum Rounding { + RN = 0, // Round to Nearest + RP = 1, // Round towards Plus infinity + RM = 2, // Round towards Minus infinity + RZ = 3, // Round towards Zero + rmMask = 3 // Bit mask selecting rounding mode + }; + + // Bit position of rounding mode bits in FPSCR. + const unsigned RoundingBitsPos = 22; + } // namespace ARM + /// Define some predicates that are used for node matching. namespace ARM { @@ -767,6 +782,7 @@ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1343,6 +1343,7 @@ // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } // We want to custom lower some of our intrinsics. @@ -6118,6 +6119,48 @@ return DAG.getMergeValues({And, Chain}, dl); } +SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue RMValue = Op->getOperand(1); + + // The rounding mode is in bits 23:22 of the FPSCR. + // The llvm.set.rounding argument value to ARM rounding mode value mapping + // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is + // ((arg - 1) & 3) << 22). + // + // It is expected that the argument of llvm.set.rounding is within the + // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is + // responsibility of the code generated llvm.set.rounding to ensure this + // condition. + + // Calculate new value of FPSCR[23:22]. + RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue, + DAG.getConstant(1, DL, MVT::i32)); + RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue, + DAG.getConstant(0x3, DL, MVT::i32)); + RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue, + DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32)); + + // Get current value of FPSCR. + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)}; + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); + FPSCR = FPSCR.getValue(0); + + // Put new rounding mode into FPSCR[23:22]. + const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos); + FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR, + DAG.getConstant(RMMask, DL, MVT::i32)); + FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue); + SDValue Ops2[] = { + Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); @@ -9849,6 +9892,7 @@ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget); case ISD::TRUNCATE: return LowerTruncatei1(Op, DAG, Subtarget); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) diff --git a/llvm/test/CodeGen/ARM/fpenv.ll b/llvm/test/CodeGen/ARM/fpenv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fpenv.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+vfp2 %s -o - | FileCheck %s + +define void @func_02(i32 %rm) { + call void @llvm.set.rounding(i32 %rm) + ret void +} + +; CHECK-LABEL: func_02: +; CHECK: vmrs r1, fpscr +; CHECK: sub r0, r0, #1 +; CHECK: and r0, r0, #3 +; CHECK: bic r1, r1, #12582912 +; CHECK: orr r0, r1, r0, lsl #22 +; CHECK: vmsr fpscr, r0 +; CHECK: mov pc, lr + + +define void @func_03() { + call void @llvm.set.rounding(i32 0) + ret void +} + +; CHECK-LABEL: func_03 +; CHECK: vmrs r0, fpscr +; CHECK: orr r0, r0, #12582912 +; CHECK: vmsr fpscr, r0 +; CHECK: mov pc, lr + + +define void @func_04() { + call void @llvm.set.rounding(i32 1) + ret void +} + +; CHECK-LABEL: func_04 +; CHECK: vmrs r0, fpscr +; CHECK: bic r0, r0, #12582912 +; CHECK: vmsr fpscr, r0 +; CHECK: mov pc, lr + + +define void @func_05() { + call void @llvm.set.rounding(i32 2) + ret void +} + + +; CHECK-LABEL: func_05 +; CHECK: vmrs r0, fpscr +; CHECK: bic r0, r0, #12582912 +; CHECK: orr r0, r0, #4194304 +; CHECK: vmsr fpscr, r0 +; CHECK: mov pc, lr + + +define void @func_06() { + call void @llvm.set.rounding(i32 3) + ret void +} + +; CHECK-LABEL: func_06 +; CHECK: vmrs r0, fpscr +; CHECK: bic r0, r0, #12582912 +; CHECK: orr r0, r0, #8388608 +; CHECK: vmsr fpscr, r0 +; CHECK: mov pc, lr + + +declare void @llvm.set.rounding(i32)