diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp --- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp +++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp @@ -36,7 +36,7 @@ /// /// Regarding compact branch hazard prevention: /// -/// Hazards handled: forbidden slots for MIPSR6. +/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below. /// /// A forbidden slot hazard occurs when a compact branch instruction is executed /// and the adjacent instruction in memory is a control transfer instruction @@ -160,7 +160,10 @@ bool buildProperJumpMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator Pos, DebugLoc DL); void expandToLongBranch(MBBInfo &Info); + template + bool handleSlot(Pred Predicate, Safe SafeInSlot); bool handleForbiddenSlot(); + bool handleFPUDelaySlot(); bool handlePossibleLongBranch(); const MipsSubtarget *STI; @@ -738,30 +741,27 @@ MBB.removeLiveIn(Mips::V0); } -bool MipsBranchExpansion::handleForbiddenSlot() { - // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6. - if (!STI->hasMips32r6() || STI->inMicroMipsMode()) - return false; - +template +bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) { bool Changed = false; for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) { for (Iter I = FI->begin(); I != FI->end(); ++I) { - // Forbidden slot hazard handling. Use lookahead over state. - if (!TII->HasForbiddenSlot(*I)) + // Delay slot hazard handling. Use lookahead over state. + if (!Predicate(*I)) continue; - Iter Inst; + Iter IInSlot; bool LastInstInFunction = std::next(I) == FI->end() && std::next(FI) == MFp->end(); if (!LastInstInFunction) { std::pair Res = getNextMachineInstr(std::next(I), &*FI); LastInstInFunction |= Res.second; - Inst = Res.first; + IInSlot = Res.first; } - if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) { + if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) { MachineBasicBlock::instr_iterator Iit = I->getIterator(); if (std::next(Iit) == FI->end() || @@ -778,6 +778,29 @@ return Changed; } +bool MipsBranchExpansion::handleForbiddenSlot() { + // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6. + if (!STI->hasMips32r6() || STI->inMicroMipsMode()) + return false; + + return handleSlot( + [this](auto &I) -> bool { return TII->HasForbiddenSlot(I); }, + [this](auto &IInSlot, auto &I) -> bool { + return TII->SafeInForbiddenSlot(IInSlot); + }); +} + +bool MipsBranchExpansion::handleFPUDelaySlot() { + // FPU delay slots are only defined for MIPS3 and below. + if (STI->hasMips32() || STI->hasMips4()) + return false; + + return handleSlot([this](auto &I) -> bool { return TII->HasFPUDelaySlot(I); }, + [this](auto &IInSlot, auto &I) -> bool { + return TII->SafeInFPUDelaySlot(IInSlot, I); + }); +} + bool MipsBranchExpansion::handlePossibleLongBranch() { if (STI->inMips16Mode() || !STI->enableLongBranchPass()) return false; @@ -857,13 +880,16 @@ // Run these two at least once bool longBranchChanged = handlePossibleLongBranch(); bool forbiddenSlotChanged = handleForbiddenSlot(); + bool fpuDelaySlotChanged = handleFPUDelaySlot(); - bool Changed = longBranchChanged || forbiddenSlotChanged; + bool Changed = + longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged; // Then run them alternatively while there are changes while (forbiddenSlotChanged) { longBranchChanged = handlePossibleLongBranch(); - if (!longBranchChanged) + fpuDelaySlotChanged = handleFPUDelaySlot(); + if (!longBranchChanged && !fpuDelaySlotChanged) break; forbiddenSlotChanged = handleForbiddenSlot(); } diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -92,9 +92,16 @@ /// Predicate to determine if an instruction can go in a forbidden slot. bool SafeInForbiddenSlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction can go in an FPU delay slot. + bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &FPUMI) const; + /// Predicate to determine if an instruction has a forbidden slot. bool HasForbiddenSlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction has an FPU delay slot. + bool HasFPUDelaySlot(const MachineInstr &MI) const; + /// Insert nop instruction when hazard condition is found void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -568,11 +568,60 @@ return (MI.getDesc().TSFlags & MipsII::IsCTI) == 0; } +bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &FPUMI) const { + if (MIInSlot.isInlineAsm()) + return false; + + if (HasFPUDelaySlot(MIInSlot)) + return false; + + switch (MIInSlot.getOpcode()) { + case Mips::BC1F: + case Mips::BC1FL: + case Mips::BC1T: + case Mips::BC1TL: + return false; + } + + for (const MachineOperand &Op : FPUMI.defs()) { + if (!Op.isReg()) + continue; + + bool Reads, Writes; + std::tie(Reads, Writes) = MIInSlot.readsWritesVirtualRegister(Op.getReg()); + + if (Reads || Writes) + return false; + } + + return true; +} + /// Predicate for distingushing instructions that have forbidden slots. bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0; } +/// Predicate for distingushing instructions that have FPU delay slots. +bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Mips::MTC1: + case Mips::MFC1: + case Mips::MTC1_D64: + case Mips::MFC1_D64: + case Mips::DMTC1: + case Mips::DMFC1: + case Mips::FCMP_S32: + case Mips::FCMP_D32: + case Mips::FCMP_D64: + return true; + + default: + return false; + } +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { switch (MI.getOpcode()) { diff --git a/llvm/test/CodeGen/Mips/llvm-ir/select-dbl.ll b/llvm/test/CodeGen/Mips/llvm-ir/select-dbl.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/select-dbl.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/select-dbl.ll @@ -203,6 +203,7 @@ ; M2-LABEL: tst_select_fcmp_olt_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.olt.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB2_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -236,6 +237,7 @@ ; M3-LABEL: tst_select_fcmp_olt_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.olt.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB2_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -283,6 +285,7 @@ ; M2-LABEL: tst_select_fcmp_ole_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ole.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB3_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -316,6 +319,7 @@ ; M3-LABEL: tst_select_fcmp_ole_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ole.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB3_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -363,6 +367,7 @@ ; M2-LABEL: tst_select_fcmp_ogt_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ule.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB4_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -396,6 +401,7 @@ ; M3-LABEL: tst_select_fcmp_ogt_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ule.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB4_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -443,6 +449,7 @@ ; M2-LABEL: tst_select_fcmp_oge_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ult.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB5_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -476,6 +483,7 @@ ; M3-LABEL: tst_select_fcmp_oge_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ult.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB5_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -523,6 +531,7 @@ ; M2-LABEL: tst_select_fcmp_oeq_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.eq.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB6_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -556,6 +565,7 @@ ; M3-LABEL: tst_select_fcmp_oeq_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.eq.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB6_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -603,6 +613,7 @@ ; M2-LABEL: tst_select_fcmp_one_double: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ueq.d $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB7_2 ; M2-NEXT: mov.d $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -637,6 +648,7 @@ ; M3-LABEL: tst_select_fcmp_one_double: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ueq.d $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB7_2 ; M3-NEXT: mov.d $f0, $f12 ; M3-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll b/llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -189,6 +189,7 @@ ; M2-LABEL: tst_select_fcmp_olt_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.olt.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB2_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -220,6 +221,7 @@ ; M3-LABEL: tst_select_fcmp_olt_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.olt.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB2_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -263,6 +265,7 @@ ; M2-LABEL: tst_select_fcmp_ole_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ole.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB3_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -294,6 +297,7 @@ ; M3-LABEL: tst_select_fcmp_ole_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ole.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB3_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -337,6 +341,7 @@ ; M2-LABEL: tst_select_fcmp_ogt_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ule.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB4_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -368,6 +373,7 @@ ; M3-LABEL: tst_select_fcmp_ogt_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ule.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB4_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -411,6 +417,7 @@ ; M2-LABEL: tst_select_fcmp_oge_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ult.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB5_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -442,6 +449,7 @@ ; M3-LABEL: tst_select_fcmp_oge_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ult.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB5_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -485,6 +493,7 @@ ; M2-LABEL: tst_select_fcmp_oeq_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.eq.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB6_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -516,6 +525,7 @@ ; M3-LABEL: tst_select_fcmp_oeq_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.eq.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB6_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -559,6 +569,7 @@ ; M2-LABEL: tst_select_fcmp_one_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ueq.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB7_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -593,6 +604,7 @@ ; M3-LABEL: tst_select_fcmp_one_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ueq.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB7_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry