Index: llvm/lib/Target/Mips/MipsBranchExpansion.cpp =================================================================== --- llvm/lib/Target/Mips/MipsBranchExpansion.cpp +++ llvm/lib/Target/Mips/MipsBranchExpansion.cpp @@ -36,7 +36,7 @@ /// /// Regarding compact branch hazard prevention: /// -/// Hazards handled: forbidden slots for MIPSR6. +/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below. /// /// A forbidden slot hazard occurs when a compact branch instruction is executed /// and the adjacent instruction in memory is a control transfer instruction @@ -162,6 +162,7 @@ void expandToLongBranch(MBBInfo &Info); bool handleForbiddenSlot(); bool handlePossibleLongBranch(); + bool handleFPUDelaySlot(); const MipsSubtarget *STI; const MipsInstrInfo *TII; @@ -840,6 +841,46 @@ return EverMadeChange; } +bool MipsBranchExpansion::handleFPUDelaySlot() { + // FPU delay slots are only defined for MIPS3 and below. + if (STI->hasMips32() || STI->hasMips4()) + return false; + + bool Changed = false; + + for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) { + for (Iter I = FI->begin(); I != FI->end(); ++I) { + + // FPU delay slot hazard handling. Use lookahead over state. + if (!TII->HasFPUDelaySlot(*I)) + continue; + + Iter Inst; + bool LastInstInFunction = + std::next(I) == FI->end() && std::next(FI) == MFp->end(); + if (!LastInstInFunction) { + std::pair Res = getNextMachineInstr(std::next(I), &*FI); + LastInstInFunction |= Res.second; + Inst = Res.first; + } + + if (LastInstInFunction || !TII->SafeInFPUDelaySlot(*Inst, *I)) { + + MachineBasicBlock::instr_iterator Iit = I->getIterator(); + if (std::next(Iit) == FI->end() || + std::next(Iit)->getOpcode() != Mips::NOP) { + Changed = true; + MIBundleBuilder(&*I).append( + BuildMI(*MFp, I->getDebugLoc(), TII->get(Mips::NOP))); + NumInsertedNops++; + } + } + } + } + + return Changed; +} + bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); IsPIC = TM.isPositionIndependent(); @@ -857,13 +898,16 @@ // Run these two at least once bool longBranchChanged = handlePossibleLongBranch(); bool forbiddenSlotChanged = handleForbiddenSlot(); + bool fpuDelaySlotChanged = handleFPUDelaySlot(); - bool Changed = longBranchChanged || forbiddenSlotChanged; + bool Changed = + longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged; // Then run them alternatively while there are changes while (forbiddenSlotChanged) { longBranchChanged = handlePossibleLongBranch(); - if (!longBranchChanged) + fpuDelaySlotChanged = handleFPUDelaySlot(); + if (!longBranchChanged && !fpuDelaySlotChanged) break; forbiddenSlotChanged = handleForbiddenSlot(); } Index: llvm/lib/Target/Mips/MipsInstrInfo.h =================================================================== --- llvm/lib/Target/Mips/MipsInstrInfo.h +++ llvm/lib/Target/Mips/MipsInstrInfo.h @@ -92,9 +92,16 @@ /// Predicate to determine if an instruction can go in a forbidden slot. bool SafeInForbiddenSlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction can go in an FPU delay slot. + bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &FPUMI) const; + /// Predicate to determine if an instruction has a forbidden slot. bool HasForbiddenSlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction has an FPU delay slot. + bool HasFPUDelaySlot(const MachineInstr &MI) const; + /// Insert nop instruction when hazard condition is found void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; Index: llvm/lib/Target/Mips/MipsInstrInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -568,11 +568,54 @@ return (MI.getDesc().TSFlags & MipsII::IsCTI) == 0; } +bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &FPUMI) const { + if (MIInSlot.isInlineAsm()) + return false; + + if (HasFPUDelaySlot(MIInSlot)) + return false; + + switch (MIInSlot.getOpcode()) { + case Mips::BC1F: + case Mips::BC1FL: + case Mips::BC1T: + case Mips::BC1TL: + return false; + } + + for (const MachineOperand &Op : FPUMI.defs()) { + if (Op.isReg()) { + bool Reads, Writes; + std::tie(Reads, Writes) = + MIInSlot.readsWritesVirtualRegister(Op.getReg()); + + if (Reads || Writes) + return false; + } + } + + return true; +} + /// Predicate for distingushing instructions that have forbidden slots. bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0; } +/// Predicate for distingushing instructions that have FPU delay slots. +bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Mips::MTC1: + case Mips::MFC1: + case Mips::FCMP_S32: + return true; + + default: + return false; + } +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { switch (MI.getOpcode()) { Index: llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll =================================================================== --- llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll +++ llvm/test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -189,6 +189,7 @@ ; M2-LABEL: tst_select_fcmp_olt_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.olt.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB2_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -220,6 +221,7 @@ ; M3-LABEL: tst_select_fcmp_olt_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.olt.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB2_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -263,6 +265,7 @@ ; M2-LABEL: tst_select_fcmp_ole_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ole.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB3_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -294,6 +297,7 @@ ; M3-LABEL: tst_select_fcmp_ole_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ole.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB3_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -337,6 +341,7 @@ ; M2-LABEL: tst_select_fcmp_ogt_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ule.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB4_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -368,6 +373,7 @@ ; M3-LABEL: tst_select_fcmp_ogt_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ule.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB4_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -411,6 +417,7 @@ ; M2-LABEL: tst_select_fcmp_oge_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ult.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB5_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -442,6 +449,7 @@ ; M3-LABEL: tst_select_fcmp_oge_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ult.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB5_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -485,6 +493,7 @@ ; M2-LABEL: tst_select_fcmp_oeq_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.eq.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1t $BB6_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -516,6 +525,7 @@ ; M3-LABEL: tst_select_fcmp_oeq_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.eq.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1t .LBB6_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry @@ -559,6 +569,7 @@ ; M2-LABEL: tst_select_fcmp_one_float: ; M2: # %bb.0: # %entry ; M2-NEXT: c.ueq.s $f12, $f14 +; M2-NEXT: nop ; M2-NEXT: bc1f $BB7_2 ; M2-NEXT: mov.s $f0, $f12 ; M2-NEXT: # %bb.1: # %entry @@ -593,6 +604,7 @@ ; M3-LABEL: tst_select_fcmp_one_float: ; M3: # %bb.0: # %entry ; M3-NEXT: c.ueq.s $f12, $f13 +; M3-NEXT: nop ; M3-NEXT: bc1f .LBB7_2 ; M3-NEXT: mov.s $f0, $f12 ; M3-NEXT: # %bb.1: # %entry