Index: lib/Target/Mips/MipsDelaySlotFiller.cpp =================================================================== --- lib/Target/Mips/MipsDelaySlotFiller.cpp +++ lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -589,12 +589,18 @@ !TII->getEquivalentCompactForm(I)) { if (searchBackward(MBB, I)) { Filled = true; - } else if (I->isTerminator()) { - if (searchSuccBBs(MBB, I)) { + // For MIPSR6, filling delay slots by searching ahead is impractical + // for codesize as it requires cloning the target instruction into the + // other paths. It can also be impractical from a performance + // perspective as a cloned memory access may not hit the cache. + } else if (!STI.hasMips32r6() || MipsCompactBranchPolicy != CB_Never){ + if (I->isTerminator()) { + if (searchSuccBBs(MBB, I)) { + Filled = true; + } + } else if (searchForward(MBB, I)) { Filled = true; } - } else if (searchForward(MBB, I)) { - Filled = true; } } @@ -709,13 +715,64 @@ return false; auto *Fn = MBB.getParent(); + const MipsSubtarget &STI = Fn->getSubtarget(); + const MipsInstrInfo *TII = STI.getInstrInfo(); RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); MemDefsUses MemDU(Fn->getDataLayout(), &Fn->getFrameInfo()); + auto *TRI = Fn->getSubtarget().getRegisterInfo(); ReverseIter Filler; - + ReverseIter RI = ReverseIter(Slot); RegDU.init(*Slot); - if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot, + // For MIPSR6 it can be benefical to inhibit filling the delay slot of a + // CTI if it introduces a load->CTI hazard. Skip over the instruction + // covering the hazard and the load and continue the search from there. + if (STI.hasMips32r6() && !STI.inMicroMipsMode() && + TII->getEquivalentCompactForm(Slot) && Slot != MBB.begin()) { + + ReverseIter Candidate = RI; + bool DFTerminates = false; + for (; Candidate != MBB.rend() && !DFTerminates; ++Candidate) { + DFTerminates = terminateSearch(*Candidate); + if (!Candidate->isDebugValue()) + break; + } + + ReverseIter Load = Candidate; + for (Load++; Load != MBB.rend() && !DFTerminates; ++Load) { + DFTerminates = terminateSearch(*Load); + if (!Load->isDebugValue()) + break; + } + + if (!DFTerminates && Candidate != MBB.rend() && Load != MBB.rend() && + Load->mayLoad()) { + BitVector BranchUses(TRI->getNumRegs()); + BitVector LoadDefs(TRI->getNumRegs()); + BitVector CandidateDefs(TRI->getNumRegs()); + + for (const MachineOperand &MO : (*Slot).uses()) + if (MO.isReg()) + BranchUses.set(MO.getReg()); + + for (const MachineOperand &MO : (*Candidate).defs()) + if (MO.isReg()) + CandidateDefs.set(MO.getReg()); + + for (const MachineOperand &MO : (*Load).defs()) + if (MO.isReg()) + LoadDefs.set(MO.getReg()); + + if (BranchUses.anyCommon(LoadDefs) && + !BranchUses.anyCommon(CandidateDefs)) { + RI = ReverseIter(std::prev(std::prev(Slot))); + delayHasHazard(*std::prev(Slot), RegDU, MemDU); + delayHasHazard(*std::prev(std::prev(Slot)), RegDU, MemDU); + } + } + } + + if (!searchRange(MBB, RI, MBB.rend(), RegDU, MemDU, Slot, Filler)) return false; Index: test/CodeGen/Mips/compactbranches/delay-slot-filler.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/compactbranches/delay-slot-filler.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=mipsel -mcpu=mips32r6 -disable-mips-df-forward-search=true \ +; RUN: -disable-mips-df-succbb-search=true -disable-mips-df-backward-search \ +; RUN: < %s | FileCheck %s -check-prefix=FORWARD + +; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=BACKWARD + +; Test the restricted backwards delay filler. We should get the return value +; synthesized into v0 in the delay slot of jr rather than the stack pointer +; increment. +declare void @k() + +define i32 @l9() #0 { +; CHECK-LABEL: l9 +entry: + call void @k() + +; BACKWARD: jr $ra +; BACKWARD: addiu $2, $zero, -1 + ret i32 -1 +} + +; Function Attrs: norecurse nounwind +define i32 @f(i32 signext %a, i32 signext %c, i32* nocapture %b) { +; CHECK-LABEL: f +entry: +; FORWARD: beqzc + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %add = shl nsw i32 %a, 1 + store i32 %add, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + %mul = mul nsw i32 %a, 6 + ret i32 %mul +} +