Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1166,4 +1166,9 @@ Intrinsic<[llvm_i64_ty], [], []>; def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; + +// PowerPC set FPSCR Intrinsic Definitions. +def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">, + Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; + } Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10948,6 +10948,137 @@ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); return BB; + } else if (MI.getOpcode() == PPC::SETRNDi) { + DebugLoc dl = MI.getDebugLoc(); + unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // The floating point rounding mode is in the bits 62:63 of FPCSR, and has + // the following settings: + // 00 Round to nearest + // 01 Round to 0 + // 10 Round to +inf + // 11 Round to -inf + + // When the operand is immediate, using the two least significant bits of + // the immediate to set the bits 62:63 of FPSCR. + unsigned Mode = MI.getOperand(1).getImm(); + BuildMI(*BB, MI, dl, TII->get(Mode & 1 ? PPC::MTFSB1 : PPC::MTFSB0)) + .addImm(31); + + BuildMI(*BB, MI, dl, TII->get(Mode & 2 ? PPC::MTFSB1 : PPC::MTFSB0)) + .addImm(30); + } else if (MI.getOpcode() == PPC::SETRND) { + DebugLoc dl = MI.getDebugLoc(); + + // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg + // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg. + // If the target doesn't have DirectMove, we should use stack to do the + // conversion, because the target doesn't have the instructions like mtvsrd + // or mfvsrd to do this conversion directly. + auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) { + if (Subtarget.hasDirectMove()) { + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg) + .addReg(SrcReg); + } else { + // Use stack to do the register copy. + unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD; + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg); + if (RC == &PPC::F8RCRegClass) { + // Copy register from F8RCRegClass to G8RCRegclass. + assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && + "Unsupported RegClass."); + + StoreOp = PPC::STFD; + LoadOp = PPC::LD; + } else { + // Copy register from G8RCRegClass to F8RCRegclass. + assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && + (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && + "Unsupported RegClass."); + } + + MachineFrameInfo &MFI = F->getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(8, 8, false); + + MachineMemOperand *MMOStore = F->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + + // Store the SrcReg into the stack. + BuildMI(*BB, MI, dl, TII->get(StoreOp)) + .addReg(SrcReg) + .addImm(0) + .addFrameIndex(FrameIdx) + .addMemOperand(MMOStore); + + MachineMemOperand *MMOLoad = F->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + + // Load from the stack where SrcReg is stored, and save to DestReg, + // so we have done the RegClass conversion from RegClass::SrcReg to + // RegClass::DestReg. + BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg) + .addImm(0) + .addFrameIndex(FrameIdx) + .addMemOperand(MMOLoad); + } + }; + + unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // When the operand is gprc register, use two least significant bits of the + // register and mtfsf instruction to set the bits 62:63 of FPSCR. + // + // copy OldFPSCRTmpReg, OldFPSCRReg + // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1) + // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62 + // copy NewFPSCRReg, NewFPSCRTmpReg + // mtfsf 255, NewFPSCRReg + MachineOperand SrcOp = MI.getOperand(1); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + + copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); + + unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + + // The first operand of INSERT_SUBREG should be a register which has + // subregisters, we only care about its RegClass, so we should use an + // IMPLICIT_DEF register. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg); + BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg) + .addReg(ImDefReg) + .add(SrcOp) + .addImm(1); + + unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) + .addReg(OldFPSCRTmpReg) + .addReg(ExtSrcReg) + .addImm(0) + .addImm(62); + + unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); + + // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 + // bits of FPSCR. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)) + .addImm(255) + .addReg(NewFPSCRReg) + .addImm(0) + .addImm(0); } else { llvm_unreachable("Unexpected instr type to insert"); } Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1310,6 +1310,15 @@ } } +// Set the float rounding mode. +let Uses = [RM], Defs = [RM] in { +def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND), + "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>; + +def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in), + "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>; +} + let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; Index: llvm/test/CodeGen/PowerPC/setrnd.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/setrnd.ll @@ -0,0 +1,46 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=CHECK-PWR7 %s + +define double @test_setrndi() { +entry: + %0 = tail call double @llvm.ppc.setrnd(i32 2) + ret double %0 + +; CHECK-LABEL: @test_setrndi +; CHECK: # %bb.0: +; CHECK-DAG: mffs 1 +; CHECK-DAG: mtfsb0 31 +; CHECK-DAG: mtfsb1 30 +; CHECK: blr +} + +define double @test_setrnd(i32 signext %x) { +entry: + %0 = tail call double @llvm.ppc.setrnd(i32 %x) + ret double %0 + +; CHECK-LABEL: @test_setrnd +; CHECK: # %bb.0: +; CHECK-DAG: mffs 1 +; CHECK-DAG: mffprd [[REG1:[0-9]+]], 1 +; CHECK-DAG: rldimi [[REG1]], 3, 0, 62 +; CHECK-DAG: mtvsrd [[REG2:[0-9]+]], [[REG1]] +; CHECK-DAG: mtfsf 255, [[REG2]] +; CHECK: blr + +; CHECK-PWR7-LABEL: @test_setrnd +; CHECK-PWR7: # %bb.0: +; CHECK-PWR7-DAG: mffs 1 +; CHECK-PWR7-DAG: stfd 1, -8(1) +; CHECK-PWR7-DAG: ld [[REG1:[0-9]+]], -8(1) +; CHECK-PWR7-DAG: rldimi [[REG1]], 3, 0, 62 +; CHECK-PWR7-DAG: std [[REG1]], -16(1) +; CHECK-PWR7-DAG: lfd [[REG2:[0-9]+]], -16(1) +; CHECK-PWR7-DAG: mtfsf 255, [[REG2]] +; CHECK-PWR7: blr +} + +declare double @llvm.ppc.setrnd(i32) +