Index: include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- include/llvm/IR/IntrinsicsAArch64.td +++ include/llvm/IR/IntrinsicsAArch64.td @@ -584,6 +584,14 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; let TargetPrefix = "aarch64" in { + class FPCR_Get_Intrinsic + : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; +} + +// FPCR +def int_aarch64_get_fpcr : FPCR_Get_Intrinsic; + +let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -593,6 +593,7 @@ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -469,6 +469,8 @@ setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); @@ -2494,6 +2496,26 @@ return false; } +SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + // The rounding mode is in bits 23:22 of the FPSCR. + // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 + // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) + // so that the shift + and get folded into a bitfield extract. + SDLoc dl(Op); + + SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64, + DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, + MVT::i64)); + SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64); + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32, + DAG.getConstant(1U << 22, dl, MVT::i32)); + SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, + DAG.getConstant(22, dl, MVT::i32)); + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, dl, MVT::i32)); +} + static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. @@ -2753,6 +2775,8 @@ return LowerFP_TO_INT(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); + case ISD::FLT_ROUNDS_: + return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::MULHS: Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -566,6 +566,9 @@ let Predicates = [HasPerfMon] in def : Pat<(readcyclecounter), (MRS 0xdce8)>; +// FPCR register +def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; + // Generic system instructions def SYSxt : SystemXtI<0, "sys">; def SYSLxt : SystemLXtI<1, "sysl">; Index: test/CodeGen/AArch64/arm64-fpcr.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-fpcr.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define i64 @GetFpcr() { +; CHECK-LABEL: GetFpcr +; CHECK: mrs x0, FPCR +; CHECK: ret + %1 = tail call i64 @llvm.aarch64.get.fpcr() + ret i64 %1 +} + +declare i64 @llvm.aarch64.get.fpcr() #0 + +define i32 @GetFltRounds() { +; CHECK-LABEL: GetFltRounds +; CHECK: mrs x8, FPCR +; CHECK: add w8, w8, #1024, lsl #12 +; CHECK: ubfx w0, w8, #22, #2 +; CHECK: ret + %1 = tail call i32 @llvm.flt.rounds() + ret i32 %1 +} + +declare i32 @llvm.flt.rounds() #0