diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -675,10 +675,13 @@ let TargetPrefix = "aarch64" in { class FPCR_Get_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; + class FPCR_Set_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; } // FPCR def int_aarch64_get_fpcr : FPCR_Get_Intrinsic; +def int_aarch64_set_fpcr : FPCR_Set_Intrinsic; let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -449,6 +449,21 @@ } // end anonymous namespace +namespace AArch64 { +/// Possible values of current rounding mode, which is specified in bits +/// 23:22 of FPCR. +enum Rounding { + RN = 0, // Round to Nearest + RP = 1, // Round towards Plus infinity + RM = 2, // Round towards Minus infinity + RZ = 3, // Round towards Zero + rmMask = 3 // Bit mask selecting rounding mode +}; + +// Bit position of rounding mode bits in FPSCR. +const unsigned RoundingBitsPos = 22; +} // namespace AArch64 + class AArch64Subtarget; class AArch64TargetMachine; @@ -895,6 +910,7 @@ SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -693,6 +693,7 @@ setOperationAction(ISD::PREFETCH, MVT::Other, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); @@ -3450,6 +3451,51 @@ return DAG.getMergeValues({AND, Chain}, dl); } +SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue RMValue = Op->getOperand(1); + + // The rounding mode is in bits 23:22 of the FPCR. + // The llvm.set.rounding argument value to the rounding mode in FPCR mapping + // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is + // ((arg - 1) & 3) << 22). + // + // The argument of llvm.set.rounding must be within the segment [0, 3], so + // NearestTiesToAway (4) is not handled here. It is responsibility of the code + // generated llvm.set.rounding to ensure this condition. + + // Calculate new value of FPSCR[23:22]. + RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue, + DAG.getConstant(1, DL, MVT::i32)); + RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue, + DAG.getConstant(0x3, DL, MVT::i32)); + RMValue = + DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue, + DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32)); + RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue); + + // Get current value of FPCR. + SDValue Ops[] = { + Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)}; + SDValue FPCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops); + Chain = FPCR.getValue(1); + FPCR = FPCR.getValue(0); + + // Put new rounding mode into FPSCR[23:22]. + const unsigned RMMask = + ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos); + FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR, + DAG.getConstant(RMMask, DL, MVT::i64)); + FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue); + SDValue Ops2[] = { + Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), + FPCR}; + return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); +} + SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -4373,6 +4419,8 @@ return LowerFSINCOS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::SET_ROUNDING: + return LowerSET_ROUNDING(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1247,6 +1247,7 @@ // FPCR register def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; +def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>; // Generic system instructions def SYSxt : SystemXtI<0, "sys">; diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fpenv.ll @@ -0,0 +1,75 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s + +define void @func_02(i32 %rm) { + call void @llvm.set.rounding(i32 %rm) + ret void +} + +; CHECK-LABEL: func_02: +; CHECK: mrs x8, FPCR +; CHECK: mov w9, #-12582913 +; CHECK: sub w10, w0, #1 +; CHECK: and x8, x8, x9 +; CHECK: and w9, w10, #0x3 +; CHECK: lsl w9, w9, #22 +; CHECK: orr x8, x8, x9 +; CHECK: msr FPCR, x8 +; CHECK: ret + + +define void @func_03() { + call void @llvm.set.rounding(i32 0) + ret void +} + +; CHECK-LABEL: func_03 +; CHECK: mrs x8, FPCR +; CHECK: and x8, x8, #0xffffffff +; CHECK: orr x8, x8, #0xc00000 +; CHECK: msr FPCR, x8 +; CHECK: ret + + +define void @func_04() { + call void @llvm.set.rounding(i32 1) + ret void +} + +; CHECK-LABEL: func_04 +; CHECK: mrs x8, FPCR +; CHECK: mov w9, #-12582913 +; CHECK: and x8, x8, x9 +; CHECK: msr FPCR, x8 +; CHECK: ret + + +define void @func_05() { + call void @llvm.set.rounding(i32 2) + ret void +} + + +; CHECK-LABEL: func_05 +; CHECK: mrs x8, FPCR +; CHECK: mov w9, #-12582913 +; CHECK: and x8, x8, x9 +; CHECK: orr x8, x8, #0x400000 +; CHECK: msr FPCR, x8 +; CHECK: ret + + +define void @func_06() { + call void @llvm.set.rounding(i32 3) + ret void +} + +; CHECK-LABEL: func_06 +; CHECK: mrs x8, FPCR +; CHECK: mov w9, #-12582913 +; CHECK: and x8, x8, x9 +; CHECK: orr x8, x8, #0x800000 +; CHECK: msr FPCR, x8 +; CHECK: ret + + +declare void @llvm.set.rounding(i32)