Index: llvm/trunk/lib/CodeGen/MachinePipeliner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachinePipeliner.cpp +++ llvm/trunk/lib/CodeGen/MachinePipeliner.cpp @@ -2725,7 +2725,7 @@ VRMap[PrevStage - np + 1].count(Def)) PhiOp2 = VRMap[PrevStage - np + 1][Def]; // Use the loop value defined in the kernel. - else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 && + else if (static_cast(LoopValStage) > PrologStage + 1 && VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; // Use the value defined by the Phi, unless we're generating the first @@ -2739,35 +2739,38 @@ // references another Phi, and the other Phi is scheduled in an // earlier stage. We can try to reuse an existing Phi up until the last // stage of the current Phi. - if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) { - int LVNumStages = Schedule.getStagesForPhi(LoopVal); - int StageDiff = (StageScheduled - LoopValStage); - LVNumStages -= StageDiff; - // Make sure the loop value Phi has been processed already. - if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { - NewReg = PhiOp2; - unsigned ReuseStage = CurStageNum; - if (Schedule.isLoopCarried(this, *PhiInst)) - ReuseStage -= LVNumStages; - // Check if the Phi to reuse has been generated yet. If not, then - // there is nothing to reuse. - if (VRMap[ReuseStage - np].count(LoopVal)) { - NewReg = VRMap[ReuseStage - np][LoopVal]; + if (LoopDefIsPhi) { + if (static_cast(PrologStage - np) >= StageScheduled) { + int LVNumStages = Schedule.getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (Schedule.isLoopCarried(this, *PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; + + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - PhiOp2 = NewReg; - if (VRMap[LastStageNum - np - 1].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - continue; + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } } - } else if (InKernel && StageDiff > 0 && - VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + } + if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; } Index: llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi11.ll =================================================================== --- llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi11.ll +++ llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi11.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv55 -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s + +; Test that the pipeliner correctly generates the operands in the +; epilog. + +; CHECK: loop0 +; CHECK: r{{[0-9]+}} = sfsub([[REG0:r([0-9]+)]],[[REG1:r([0-9]+)]]) +; CHECK: endloop0 +; CHECK: r{{[0-9]+}} = sfsub([[REG0]],[[REG1]]) +; CHECK: r{{[0-9]+}} = sfsub([[REG0]],r{{[0-9]+}}) + +define dso_local void @test(i32 %m) local_unnamed_addr #0 { +entry: + %div = sdiv i32 %m, 2 + %sub = add nsw i32 %div, -1 + br label %for.body.prol + +for.body.prol: + %i.0106.prol = phi i32 [ undef, %for.body.prol ], [ %sub, %entry ] + %sr.prol = phi float [ %0, %for.body.prol ], [ undef, %entry ] + %sr109.prol = phi float [ %sr.prol, %for.body.prol ], [ undef, %entry ] + %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ undef, %entry ] + %0 = load float, float* undef, align 4 + %sub7.prol = fsub contract float %sr109.prol, %0 + store float %sub7.prol, float* null, align 4 + %prol.iter.sub = add i32 %prol.iter, -1 + %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0 + br i1 %prol.iter.cmp, label %for.body.prol.loopexit, label %for.body.prol + +for.body.prol.loopexit: + unreachable +} +