Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -47,6 +47,11 @@ // eieio instruction def int_ppc_eieio : Intrinsic<[],[],[]>; + // Get content from current FPSCR register + def int_ppc_readflm : Intrinsic<[llvm_double_ty], [], [IntrNoMem]>; + // Set FPSCR register, and return previous content + def int_ppc_setflm : Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12114,6 +12114,20 @@ .addReg(NewFPSCRReg) .addImm(0) .addImm(0); + } else if (MI.getOpcode() == PPC::SETFLM) { + DebugLoc Dl = MI.getDebugLoc(); + + // Result of setflm is previous FPSCR content, so we need to save it first. + Register OldFPSCRReg = MI.getOperand(0).getReg(); + BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // Put bits in 32:63 to FPSCR. + Register NewFPSCRReg = MI.getOperand(1).getReg(); + BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF)) + .addImm(255) + .addReg(NewFPSCRReg) + .addImm(0) + .addImm(0); } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 || MI.getOpcode() == PPC::PROBED_ALLOCA_64) { return emitProbedAlloca(MI, BB); Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -463,6 +463,10 @@ // Predication support. bool isPredicated(const MachineInstr &MI) const override; + bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1644,6 +1644,17 @@ return false; } +bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion + // across them, since some FP operations may change content of FPSCR. + // TODO: Model FPSCR in PPC instruction definitions and remove the workaround + if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) + return true; + return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); +} + bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const { unsigned OpC = MI.getOpcode(); Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1514,6 +1514,9 @@ def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in), "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>; + +def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), + "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; } let Defs = [LR] in @@ -3266,7 +3269,7 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; - +def : Pat<(int_ppc_readflm), (MFFS)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -87,23 +87,23 @@ ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lis 3, .LCPI0_1@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 160(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 +; CHECK-NEXT: lis 3, .LCPI0_1@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 0, 28, 29 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fctiwz 0, 0 -; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fadd 1, 28, 29 +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_1@l(3) +; CHECK-NEXT: fctiwz 1, 1 +; CHECK-NEXT: stfd 1, 152(1) ; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: lwz 3, 164(1) -; CHECK-NEXT: fcmpu 1, 29, 1 +; CHECK-NEXT: fcmpu 1, 29, 0 ; CHECK-NEXT: lwz 4, 156(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 @@ -209,25 +209,25 @@ ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lis 3, .LCPI0_2@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 32(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) -; CHECK-NEXT: lis 3, .LCPI0_3@ha +; CHECK-NEXT: lis 3, .LCPI0_2@ha +; CHECK-NEXT: lfs 2, .LCPI0_2@l(3) ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 28, 29 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) -; CHECK-NEXT: fctiwz 2, 2 -; CHECK-NEXT: stfd 2, 24(1) -; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: lis 3, .LCPI0_3@ha +; CHECK-NEXT: fadd 1, 28, 29 +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_3@l(3) +; CHECK-NEXT: fctiwz 1, 1 +; CHECK-NEXT: stfd 1, 24(1) +; CHECK-NEXT: fcmpu 0, 30, 2 ; CHECK-NEXT: lwz 3, 36(1) -; CHECK-NEXT: fcmpu 1, 31, 1 +; CHECK-NEXT: fcmpu 1, 31, 0 ; CHECK-NEXT: lwz 4, 28(1) ; CHECK-NEXT: crandc 20, 6, 1 ; CHECK-NEXT: cror 20, 4, 20 @@ -264,25 +264,25 @@ ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lis 3, .LCPI0_0@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 96(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lfs 0, .LCPI0_0@l(3) -; CHECK-NEXT: lis 3, .LCPI0_1@ha +; CHECK-NEXT: lis 3, .LCPI0_0@ha +; CHECK-NEXT: lfs 2, .LCPI0_0@l(3) ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 30, 31 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fctiwz 2, 2 -; CHECK-NEXT: stfd 2, 88(1) -; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: lis 3, .LCPI0_1@ha +; CHECK-NEXT: fadd 1, 30, 31 +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_1@l(3) +; CHECK-NEXT: fctiwz 1, 1 +; CHECK-NEXT: stfd 1, 88(1) +; CHECK-NEXT: fcmpu 0, 30, 2 ; CHECK-NEXT: lwz 3, 100(1) -; CHECK-NEXT: fcmpu 1, 31, 1 +; CHECK-NEXT: fcmpu 1, 31, 0 ; CHECK-NEXT: lwz 4, 92(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 Index: llvm/test/CodeGen/PowerPC/read-set-flm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/read-set-flm.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s +; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \ +; RUN: 2>&1 | FileCheck %s --check-prefix=LOG + +define double @in_nostrict(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: in_nostrict: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: xsdivdp 1, 1, 2 +; CHECK-NEXT: xsadddp 1, 1, 3 +; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mtfsf 255, 4 +; CHECK-NEXT: xsdivdp 1, 3, 4 +; CHECK-NEXT: xsadddp 1, 1, 2 +; CHECK-NEXT: xsadddp 1, 0, 1 +; CHECK-NEXT: blr +; +; LOG: *** MI Scheduling *** +; LOG-NEXT: in_nostrict:%bb.0 entry +; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0 +; LOG: *** MI Scheduling *** +; LOG-NEXT: in_nostrict:%bb.0 entry +; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm +; +; LOG: *** MI Scheduling *** +; LOG-NEXT: in_nostrict:%bb.0 entry +; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0 +entry: + %0 = tail call double @llvm.ppc.readflm() + %1 = fdiv double %a, %b + %2 = fadd double %1, %c + %3 = fadd double %2, %0 + call double @llvm.ppc.setflm(double %d) + %5 = fdiv double %c, %d + %6 = fadd double %5, %b + %7 = fadd double %3, %6 + ret double %7 +} + +define double @in_strict(double %a, double %b, double %c, double %d) #0 { +; CHECK-LABEL: in_strict: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: xsdivdp 1, 1, 2 +; CHECK-NEXT: xsadddp 1, 1, 3 +; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mtfsf 255, 4 +; CHECK-NEXT: xsdivdp 1, 3, 4 +; CHECK-NEXT: xsadddp 1, 1, 2 +; CHECK-NEXT: xsadddp 1, 0, 1 +; CHECK-NEXT: blr +; +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: in_strict:%bb.0 entry +; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0 +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: in_strict:%bb.0 entry +; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm +; +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: in_strict:%bb.0 entry +; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0 +entry: + %0 = tail call double @llvm.ppc.readflm() + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %1, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %3 = call double @llvm.experimental.constrained.fadd.f64(double %2, double %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + call double @llvm.ppc.setflm(double %d) + %5 = call double @llvm.experimental.constrained.fdiv.f64(double %c, double %d, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %6 = call double @llvm.experimental.constrained.fadd.f64(double %5, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %7 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %6, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %7 +} + +declare double @llvm.ppc.readflm() +declare double @llvm.ppc.setflm(double) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) + +attributes #0 = { strictfp }