diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1281,6 +1281,7 @@ const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -421,6 +421,7 @@ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && @@ -8841,6 +8842,83 @@ return FP; } +SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc Dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + SDValue Chain = Op.getOperand(0); + + // If requested mode is constant, just use simpler mtfsb. + if (auto *CVal = dyn_cast(Op.getOperand(1))) { + uint64_t Mode = CVal->getZExtValue(); + if (Mode >= 4) + llvm_unreachable("Unsupported rounding mode!"); + unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); + SDNode *SetHi = DAG.getMachineNode( + (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(30, Dl, MVT::i32, true), Chain}); + SDNode *SetLo = DAG.getMachineNode( + (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)}); + return SDValue(SetLo, 0); + } + + // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format. + SDValue One = DAG.getConstant(1, Dl, MVT::i32); + SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(3, Dl, MVT::i32)); + SDValue DstFlag = DAG.getNode( + ISD::XOR, Dl, MVT::i32, SrcFlag, + DAG.getNode(ISD::AND, Dl, MVT::i32, + DAG.getNOT(Dl, + DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One), + MVT::i32), + One)); + SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + SDValue NewFPSCR; + if (isTypeLegal(MVT::i64)) { + // Set the last two bits (rounding mode) of bitcasted FPSCR. + NewFPSCR = DAG.getNode( + ISD::OR, Dl, MVT::i64, + DAG.getNode(ISD::AND, Dl, MVT::i64, + DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)), + DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag)); + NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR); + } else { + // In 32-bit mode, store f64, load and update the lower half. + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); + SDValue Addr; + if (Subtarget.isLittleEndian()) + Addr = StackSlot; + else + Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, + DAG.getConstant(4, Dl, PtrVT)); + SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); + Chain = Tmp.getValue(1); + + Tmp = DAG.getNode( + ISD::OR, Dl, MVT::i32, + DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp, + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)), + DstFlag); + + Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + NewFPSCR = + DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo()); + Chain = NewFPSCR.getValue(1); + } + SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true); + SDNode *MTFSF = DAG.getMachineNode( + PPC::MTFSF, Dl, MVT::Other, + {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain}); + return SDValue(MTFSF, 0); +} + SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11591,6 +11669,8 @@ case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); + case ISD::SET_ROUNDING: + return LowerSET_ROUNDING(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -77,4 +77,196 @@ ret i32 %retval3 } -declare i32 @llvm.get.rounding() nounwind +define void @setrnd_tozero() { +; PPC32-LABEL: setrnd_tozero: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tozero: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tozero: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tozero: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 0) + ret void +} + +define void @setrnd_tonearest_tieeven() { +; PPC32-LABEL: setrnd_tonearest_tieeven: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tonearest_tieeven: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tonearest_tieeven: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tonearest_tieeven: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 1) + ret void +} + +define void @setrnd_toposinf() { +; PPC32-LABEL: setrnd_toposinf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toposinf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toposinf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toposinf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 2) + ret void +} + +define void @setrnd_toneginf() { +; PPC32-LABEL: setrnd_toneginf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toneginf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toneginf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toneginf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 3) + ret void +} + +define void @setrnd_var(i32 %x) { +; PPC32-LABEL: setrnd_var: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: stwu 1, -16(1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: mffs 0 +; PPC32-NEXT: stfd 0, 8(1) +; PPC32-NEXT: clrlwi 4, 3, 30 +; PPC32-NEXT: lwz 5, 12(1) +; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC32-NEXT: xor 3, 3, 4 +; PPC32-NEXT: xori 3, 3, 1 +; PPC32-NEXT: rlwinm 4, 5, 0, 0, 29 +; PPC32-NEXT: rlwimi 4, 3, 0, 30, 31 +; PPC32-NEXT: stw 4, 12(1) +; PPC32-NEXT: lfd 0, 8(1) +; PPC32-NEXT: mtfsf 255, 0 +; PPC32-NEXT: addi 1, 1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_var: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mffs 0 +; PPC64-NEXT: stfd 0, -16(1) +; PPC64-NEXT: ld 5, -16(1) +; PPC64-NEXT: clrlwi 4, 3, 30 +; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64-NEXT: xor 3, 3, 4 +; PPC64-NEXT: xori 3, 3, 1 +; PPC64-NEXT: clrldi 3, 3, 32 +; PPC64-NEXT: rldicr 4, 5, 0, 61 +; PPC64-NEXT: or 3, 4, 3 +; PPC64-NEXT: std 3, -8(1) +; PPC64-NEXT: lfd 0, -8(1) +; PPC64-NEXT: mtfsf 255, 0 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_var: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mffs 0 +; PPC64LE-NEXT: stfd 0, -16(1) +; PPC64LE-NEXT: clrlwi 4, 3, 30 +; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: xor 3, 3, 4 +; PPC64LE-NEXT: ld 4, -16(1) +; PPC64LE-NEXT: xori 3, 3, 1 +; PPC64LE-NEXT: clrldi 3, 3, 32 +; PPC64LE-NEXT: rldicr 4, 4, 0, 61 +; PPC64LE-NEXT: or 3, 4, 3 +; PPC64LE-NEXT: std 3, -8(1) +; PPC64LE-NEXT: lfd 0, -8(1) +; PPC64LE-NEXT: mtfsf 255, 0 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_var: +; DM: # %bb.0: # %entry +; DM-NEXT: clrlwi 4, 3, 30 +; DM-NEXT: rlwinm 3, 3, 31, 31, 31 +; DM-NEXT: xor 3, 3, 4 +; DM-NEXT: xori 3, 3, 1 +; DM-NEXT: clrldi 3, 3, 32 +; DM-NEXT: mffs 0 +; DM-NEXT: mffprd 4, 0 +; DM-NEXT: rldicr 4, 4, 0, 61 +; DM-NEXT: or 3, 4, 3 +; DM-NEXT: mtfprd 0, 3 +; DM-NEXT: mtfsf 255, 0 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 %x) + ret void +} + +declare i32 @llvm.get.rounding() #0 +declare void @llvm.set.rounding(i32) #0 + +attributes #0 = { nounwind }