diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h --- a/llvm/include/llvm/CodeGen/ModuloSchedule.h +++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -290,6 +290,9 @@ /// but not produced (in the epilog) or produced but not available (in the /// prolog). DenseMap AvailableStages; + /// When peeling the epilogue keep track of the distance between the phi + /// nodes and the kernel. + DenseMap PhiNodeLoopIteration; /// CanonicalMIs and BlockMIs form a bidirectional map between any of the /// loop kernel clones. diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1214,7 +1214,7 @@ // Remove any dead phis in MBB. Dead phis either have only one block as input // (in which case they are the identity) or have no uses. void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, - LiveIntervals *LIS) { + LiveIntervals *LIS, bool KeepSingleSrcPhi = false) { bool Changed = true; while (Changed) { Changed = false; @@ -1226,7 +1226,7 @@ LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); Changed = true; - } else if (MI.getNumExplicitOperands() == 3) { + } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) { MRI.constrainRegClass(MI.getOperand(1).getReg(), MRI.getRegClass(MI.getOperand(0).getReg())); MRI.replaceRegWith(MI.getOperand(0).getReg(), @@ -1657,22 +1657,32 @@ for (auto *P : PhiToDelete) P->eraseFromParent(); InsertPt = DestBB->getFirstNonPHI(); - for (MachineInstr &MI : SourceBB->phis()) { - MachineInstr *NewMI = MF.CloneMachineInstr(&MI); - DestBB->insert(InsertPt, NewMI); - Register OrigR = MI.getOperand(0).getReg(); - Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); - NewMI->getOperand(0).setReg(R); - NewMI->getOperand(1).setReg(OrigR); - NewMI->getOperand(2).setMBB(*DestBB->pred_begin()); - Remaps[OrigR] = R; - CanonicalMIs[NewMI] = CanonicalMIs[&MI]; - BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NewMI; - } for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) - for (MachineOperand &MO : I->uses()) - if (MO.isReg() && Remaps.count(MO.getReg())) + for (MachineOperand &MO : I->uses()) { + if (!MO.isReg()) + continue; + if (Remaps.count(MO.getReg())) MO.setReg(Remaps[MO.getReg()]); + else { + // If we are using a phi from the source block we need to add a new phi + // pointing to the old one. + MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg()); + if (Use && Use->isPHI() && Use->getParent() == SourceBB) { + MachineInstr *NewMI = MF.CloneMachineInstr(Use); + DestBB->insert(InsertPt, NewMI); + Register OrigR = Use->getOperand(0).getReg(); + Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); + NewMI->getOperand(0).setReg(R); + NewMI->getOperand(1).setReg(OrigR); + NewMI->getOperand(2).setMBB(*DestBB->pred_begin()); + MO.setReg(R); + Remaps[OrigR] = R; + CanonicalMIs[NewMI] = CanonicalMIs[Use]; + BlockMIs[{DestBB, CanonicalMIs[Use]}] = NewMI; + PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Use]; + } + } + } } void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { @@ -1697,7 +1707,7 @@ // property that any value deffed in BB but used outside of BB is used by a // PHI in the exiting block. MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); - + EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true); // Push out the epilogs, again in reverse order. // We can't assume anything about the minumum loop trip count at this point, // so emit a fairly complex epilog. @@ -1716,7 +1726,13 @@ // instructions of a previous loop iteration. for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { Epilogs.push_back(peelKernel(LPD_Back)); - filterInstructions(Epilogs.back(), Schedule.getNumStages() - I); + MachineBasicBlock *B = Epilogs.back(); + filterInstructions(B, Schedule.getNumStages() - I); + // Keep track at which iteration each phi belongs to. We need it to know + // what version of the variable to use during prologue/epilogue stitching. + EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true); + for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi) + PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I; } for (size_t I = 0; I < Epilogs.size(); I++) { LS.reset(); @@ -1744,8 +1760,27 @@ for (MachineInstr &MI : (*EI)->phis()) { Register Reg = MI.getOperand(1).getReg(); MachineInstr *Use = MRI.getUniqueVRegDef(Reg); - if (Use && Use->getParent() == Pred) + if (Use && Use->getParent() == Pred) { + // If the use comes from a phi we need to skip as many phi as the + // distance between the epilogue and the kernel. Trace through the phi + // chain to find the right value. + MachineInstr *CanonicalUse = CanonicalMIs[Use]; + if (CanonicalUse->isPHI()) { + unsigned distance = PhiNodeLoopIteration[Use]; + for (unsigned I = 0; I < distance; ++I) { + assert(CanonicalUse->isPHI()); + assert(CanonicalUse->getNumOperands() == 5); + unsigned LoopRegIdx = 3, InitRegIdx = 1; + if (CanonicalUse->getOperand(2).getMBB() == + CanonicalUse->getParent()) + std::swap(LoopRegIdx, InitRegIdx); + CanonicalUse = + MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg()); + } + Reg = CanonicalUse->getOperand(0).getReg(); + } Reg = getEquivalentRegisterIn(Reg, *PI); + } MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false)); MI.addOperand(MachineOperand::CreateMBB(*PI)); } diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll @@ -5,7 +5,7 @@ ; CHECK: loop0 ; CHECK: r{{[0-9]+}} = add([[REG0:r([0-9]+)]],#8) -; CHECK: [[REG0:r([0-9]+)]] = [[REG1:r([0-9]+)]] +; CHECK: [[REG0]] = [[REG1:r([0-9]+)]] ; CHECK: endloop0 ; CHECK: = add([[REG1]],#8) diff --git a/llvm/test/CodeGen/Hexagon/swp-stages4.ll b/llvm/test/CodeGen/Hexagon/swp-stages4.ll --- a/llvm/test/CodeGen/Hexagon/swp-stages4.ll +++ b/llvm/test/CodeGen/Hexagon/swp-stages4.ll @@ -5,6 +5,7 @@ ; CHECK: = and ; CHECK: = and +; CHECK: r[[REGA:[0-9]+]] = memub(r{{[0-9]+}}+#1) ; CHECK: r[[REG0:[0-9]+]] = and(r[[REG1:[0-9]+]],#255) ; CHECK-NOT: r[[REG0]] = and(r[[REG1]],#255) ; CHECK: loop0(.LBB0_[[LOOP:.]],