diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -155,11 +155,12 @@ TargetLibraryInfo *LibInfo = nullptr; Module *M = nullptr; bool MadeChange = false; + SmallVector DeadLoops; }; class HardwareLoop { // Expand the trip count scev into a value that we can use. - Value *InitLoopCount(); + Value *InitLoopCount(SmallVector &DeadLoops); // Insert the set_loop_iteration intrinsic. Value *InsertIterationSetup(Value *LoopCountInit); @@ -191,7 +192,7 @@ UsePHICounter(Info.CounterInReg), UseLoopGuard(Info.PerformEntryTest) { } - void Create(); + void Create(SmallVector &DeadLoops); private: ScalarEvolution &SE; @@ -233,6 +234,24 @@ if (L->isOutermost()) TryConvertLoop(L); + for (Loop *L : DeadLoops) { + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Dead loop should have a unique exit block!\n"); + assert(L->hasDedicatedExits() && + "Dead loop should have a dedicated exit block!\n"); + + if (!L->isRecursivelyLCSSAForm(*DT, *LI)) + formLCSSARecursively(*L, *DT, LI, SE); + + // Set incoming value to poison for phi nodes in the exit block. + for (PHINode &P : ExitBlock->phis()) { + std::fill(P.incoming_values().begin(), P.incoming_values().end(), + PoisonValue::get(P.getType())); + } + deleteDeadLoop(L, DT, SE, LI); + MadeChange = true; + } + return MadeChange; } @@ -305,15 +324,15 @@ return false; HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); - HWLoop.Create(); + HWLoop.Create(DeadLoops); ++NumHWLoops; return true; } -void HardwareLoop::Create() { +void HardwareLoop::Create(SmallVector &DeadLoops) { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - Value *LoopCountInit = InitLoopCount(); + Value *LoopCountInit = InitLoopCount(DeadLoops); if (!LoopCountInit) { reportHWLoopFailure("could not safely create a loop count expression", "HWLoopNotSafe", ORE, L); @@ -378,7 +397,7 @@ return true; } -Value *HardwareLoop::InitLoopCount() { +Value *HardwareLoop::InitLoopCount(SmallVector &DeadLoops) { LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n"); // Can we replace a conditional branch with an intrinsic that sets the // loop counter and tests that is not zero? @@ -422,6 +441,16 @@ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, BB->getTerminator()); + // Remove the loop which has a undef loop count. + // For now deleteDeadLoop() can only delete a loop which has a unique and + // dedicated exit block, so we just delete these loops. + if (isa(Count)) { + LLVM_DEBUG(dbgs() << " - Bailing, loop count is undef\n"); + if (L->getUniqueExitBlock() && L->hasDedicatedExits()) + DeadLoops.push_back(L); + return nullptr; + } + // FIXME: We've expanded Count where we hope to insert the counter setting // intrinsic. But, in the case of the 'test and set' form, we may fallback to // the just 'set' form and in which case the insertion block is most likely diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -143,7 +143,7 @@ ret void } -define void @func_48786() #0 { +define void @func_48786(i64 %count) #0 { ; CHECK-LABEL: func_48786: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mfocrf r12, 32 @@ -151,10 +151,10 @@ ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) ; CHECK-NEXT: stdu r1, -48(r1) -; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: ld r4, 0(r3) ; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill -; CHECK-NEXT: # implicit-def: $x30 -; CHECK-NEXT: cmpdi r3, 0 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: cmpdi r4, 0 ; CHECK-NEXT: crnot 4*cr2+lt, eq ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_3 ; CHECK-NEXT: .p2align 4 @@ -188,10 +188,11 @@ ; ; CHECK-P9-LABEL: func_48786: ; CHECK-P9: # %bb.0: # %bb -; CHECK-P9-NEXT: ld r3, 0(r3) -; CHECK-P9-NEXT: cmpdi r3, 0 +; CHECK-P9-NEXT: ld r4, 0(r3) +; CHECK-P9-NEXT: subfic r3, r3, 1 ; CHECK-P9-NEXT: mtctr r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: cmpdi r4, 0 ; CHECK-P9-NEXT: crnot 4*cr5+lt, eq ; CHECK-P9-NEXT: b .LBB2_2 ; CHECK-P9-NEXT: .p2align 5 @@ -218,7 +219,7 @@ br label %bb2 bb2: ; preds = %bb12, %bb - %i3 = phi i64 [ undef, %bb ], [ %i13, %bb12 ] + %i3 = phi i64 [ %count, %bb ], [ %i13, %bb12 ] br i1 undef, label %bb10, label %bb4 bb4: ; preds = %bb2 diff --git a/llvm/test/CodeGen/PowerPC/sms-iterator.ll b/llvm/test/CodeGen/PowerPC/sms-iterator.ll --- a/llvm/test/CodeGen/PowerPC/sms-iterator.ll +++ b/llvm/test/CodeGen/PowerPC/sms-iterator.ll @@ -7,7 +7,7 @@ ; CHECK: MII = 3 MAX_II = 13 (rec=3, res=2) -define dso_local fastcc double @_ZN3povL9polysolveEiPdS0_() unnamed_addr #0 { +define dso_local fastcc double @_ZN3povL9polysolveEiPdS0_(i64 %count) unnamed_addr #0 { br label %1 1: ; preds = %1, %0 @@ -19,7 +19,7 @@ 3: ; preds = %3, %2 %4 = phi i64 [ %7, %3 ], [ undef, %2 ] %5 = phi double [ %11, %3 ], [ undef, %2 ] - %6 = phi i64 [ %12, %3 ], [ undef, %2 ] + %6 = phi i64 [ %12, %3 ], [ %count, %2 ] %7 = add nsw i64 %4, -1 %8 = fmul fast double %5, 1.000000e+07 %9 = getelementptr inbounds %0, ptr null, i64 1, i32 1, i64 %7 diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll --- a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll @@ -5,23 +5,25 @@ %0 = type { double, double, double, i32, i32 } declare i8* @malloc() local_unnamed_addr -define void @phi3(i32*) nounwind { +define void @phi3(i32*, i64 %count) nounwind { ; CHECK-LABEL: phi3: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -64(1) -; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: mr 29, 3 ; CHECK-NEXT: bl malloc ; CHECK-NEXT: nop -; CHECK-NEXT: mr 29, 3 +; CHECK-NEXT: mr 28, 3 ; CHECK-NEXT: bl malloc ; CHECK-NEXT: nop -; CHECK-NEXT: addi 7, 30, -4 -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: addi 4, 29, -8 +; CHECK-NEXT: addi 7, 29, -4 +; CHECK-NEXT: mtctr 30 +; CHECK-NEXT: addi 4, 28, -8 ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: lwzu 8, 4(7) ; CHECK-NEXT: bdz .LBB0_5 @@ -38,7 +40,7 @@ ; CHECK-NEXT: lwzu 8, 4(7) ; CHECK-NEXT: bdz .LBB0_4 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_3: # +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: add 9, 3, 6 ; CHECK-NEXT: extswsli 6, 5, 5 ; CHECK-NEXT: add 5, 8, 5 @@ -56,6 +58,7 @@ ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr 0 ; CHECK-NEXT: blr %2 = tail call noalias i8* @malloc() @@ -67,7 +70,7 @@ 6: ; preds = %6, %1 %7 = phi i64 [ %16, %6 ], [ 0, %1 ] %8 = phi i32 [ %15, %6 ], [ 0, %1 ] - %9 = phi i64 [ %17, %6 ], [ undef, %1 ] + %9 = phi i64 [ %17, %6 ], [ %count, %1 ] %10 = sext i32 %8 to i64 %11 = getelementptr inbounds %0, %0* %5, i64 %10 %12 = getelementptr inbounds %0*, %0** %3, i64 %7 diff --git a/llvm/test/Transforms/HardwareLoops/remove-loop-count-undef-loop.ll b/llvm/test/Transforms/HardwareLoops/remove-loop-count-undef-loop.ll --- a/llvm/test/Transforms/HardwareLoops/remove-loop-count-undef-loop.ll +++ b/llvm/test/Transforms/HardwareLoops/remove-loop-count-undef-loop.ll @@ -4,16 +4,10 @@ define void @foo1() #0 { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: bb: -; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 undef) -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB1]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to double -; CHECK-NEXT: [[TMP2]] = fptosi double [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-NEXT: br i1 [[TMP3]], label [[BB1]], label [[BB8:%.*]] +; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: bb8: -; CHECK-NEXT: call void @bar(i32 [[TMP2]]) +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ poison, [[BB:%.*]] ] +; CHECK-NEXT: call void @bar(i32 [[DOTLCSSA]]) ; CHECK-NEXT: ret void ; bb: