Index: lib/Target/PowerPC/PPCCTRLoops.cpp =================================================================== --- lib/Target/PowerPC/PPCCTRLoops.cpp +++ lib/Target/PowerPC/PPCCTRLoops.cpp @@ -528,6 +528,30 @@ SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); + // If there is an exit edge known to be frequently taken, + // we should not transform this loop. + for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); I != IE; ++I) { + Instruction *TI = (*I)->getTerminator(); + if (!TI) continue; + + if (BranchInst *BI = dyn_cast(TI)) { + uint64_t TrueWeight, FalseWeight; + if (!BI->isConditional() || + !BI->extractProfMetadata(TrueWeight, FalseWeight)) + continue; + + BasicBlock *TrueBB = BI->getSuccessor(0); + uint64_t ExitWeight = L->contains(TrueBB) ? FalseWeight : TrueWeight; + BranchProbability BP = BranchProbability(ExitWeight, + TrueWeight + FalseWeight); + // If the exit path is more frequent than the loop path, + // we return here without further analysis for this loop. + if (BP > BranchProbability(1, 2)) + return MadeChange; + } + } + BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; Index: test/CodeGen/PowerPC/ctrloops-hot-exit.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/ctrloops-hot-exit.ll @@ -0,0 +1,97 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +; If there is an exit edge known to be frequently taken, +; we should not transform this loop. + +; A loop having a hot exit edge +define signext i64 @func() { +; CHECK: @func +; CHECK-NOT: mtctr +; CHECK-NOT: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !1 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a cold exit edge +define signext i64 @func2() { +; CHECK: @func2 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !2 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having an exit edge without profile data +define signext i64 @func3() { +; CHECK: @func3 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +!1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!"branch_weights", i32 2000, i32 1}