Index: llvm/include/llvm/Analysis/BranchProbabilityInfo.h =================================================================== --- llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -30,6 +30,29 @@ #include #include +// Weights are for internal use only. They are used by heuristics to help to +// estimate edges' probability. Example: +// +// Using "Loop Branch Heuristics" we predict weights of edges for the +// block BB2. +// ... +// | +// V +// BB1<-+ +// | | +// | | (Weight = 124) +// V | +// BB2--+ +// | +// | (Weight = 4) +// V +// BB3 +// +// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 +// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 +static constexpr uint32_t LBH_TAKEN_WEIGHT = 124; +static constexpr uint32_t LBH_NONTAKEN_WEIGHT = 4; + namespace llvm { class Function; Index: llvm/lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -74,29 +74,6 @@ char BranchProbabilityInfoWrapperPass::ID = 0; -// Weights are for internal use only. They are used by heuristics to help to -// estimate edges' probability. Example: -// -// Using "Loop Branch Heuristics" we predict weights of edges for the -// block BB2. -// ... -// | -// V -// BB1<-+ -// | | -// | | (Weight = 124) -// V | -// BB2--+ -// | -// | (Weight = 4) -// V -// BB3 -// -// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 -// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 -static const uint32_t LBH_TAKEN_WEIGHT = 124; -static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - /// Unreachable-terminating branch taken probability. /// /// This is the probability for a branch being taken to a block that terminates Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -25,12 +25,14 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -611,6 +613,22 @@ if (!isa(PHBI->getCondition()) || PHBI->getSuccessor(cast(PHBI->getCondition())->isZero()) != NewHeader) { + + // Set prof metadata for the rotated conditional branch because it was a + // branch exiting loop. This gives a hint that branch to loop's preheader + // is of high probability. Note we only do this when there isn't existing + // prof metadata. + MDNode *WeightsNode = PHBI->getMetadata(LLVMContext::MD_prof); + if (!WeightsNode) { + uint32_t TWeight = LBH_TAKEN_WEIGHT, FWeight = LBH_NONTAKEN_WEIGHT; + if (PHBI->getSuccessor(0) == Exit) + std::swap(TWeight, FWeight); + + llvm::MDBuilder MDHelper(PHBI->getContext()); + llvm::MDNode *BrWeight = MDHelper.createBranchWeights(TWeight, FWeight); + PHBI->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); + } + // The conditional branch can't be folded, handle the general case. // Split edges as necessary to preserve LoopSimplify form. Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -46,32 +46,32 @@ ; CHECK-NEXT: store i32 1, i32* [[N_SPILL_ADDR_I]], align 4 ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP0]] to %f.Frame** -; CHECK-NEXT: [[FRAMEPTR_I:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !0 +; CHECK-NEXT: [[FRAMEPTR_I:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !1 ; CHECK-NEXT: [[N_RELOAD_ADDR9_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 0 -; CHECK-NEXT: [[N_RELOAD10_I:%.*]] = load i32, i32* [[N_RELOAD_ADDR9_I]], align 4, !noalias !0 +; CHECK-NEXT: [[N_RELOAD10_I:%.*]] = load i32, i32* [[N_RELOAD_ADDR9_I]], align 4, !noalias !1 ; CHECK-NEXT: [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 1 -; CHECK-NEXT: store i32 [[N_RELOAD10_I]], i32* [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !0 +; CHECK-NEXT: store i32 [[N_RELOAD10_I]], i32* [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !1 ; CHECK-NEXT: [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 2 -; CHECK-NEXT: store i32 2, i32* [[INPUT_SPILL_ADDR_I]], align 4, !noalias !0 +; CHECK-NEXT: store i32 2, i32* [[INPUT_SPILL_ADDR_I]], align 4, !noalias !1 ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) -; CHECK-NEXT: [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !3 +; CHECK-NEXT: [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !4 ; CHECK-NEXT: [[INPUT_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2 -; CHECK-NEXT: [[INPUT_RELOAD_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: [[INPUT_RELOAD_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !4 ; CHECK-NEXT: [[N_VAL3_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 1 -; CHECK-NEXT: [[N_VAL3_RELOAD_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: [[N_VAL3_RELOAD_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !4 ; CHECK-NEXT: [[SUM8_I:%.*]] = add i32 [[N_VAL3_RELOAD_I]], [[INPUT_RELOAD_I]] -; CHECK-NEXT: store i32 [[SUM8_I]], i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3 -; CHECK-NEXT: store i32 4, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: store i32 [[SUM8_I]], i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !4 +; CHECK-NEXT: store i32 4, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !4 ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) -; CHECK-NEXT: [[FRAMEPTR_I4:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !6 +; CHECK-NEXT: [[FRAMEPTR_I4:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !7 ; CHECK-NEXT: [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 2 -; CHECK-NEXT: [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !6 +; CHECK-NEXT: [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !7 ; CHECK-NEXT: [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 1 -; CHECK-NEXT: [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !6 +; CHECK-NEXT: [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !7 ; CHECK-NEXT: [[SUM7_I:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], [[INPUT_RELOAD14_I]] -; CHECK-NEXT: call void @print(i32 [[SUM7_I]]), !noalias !6 +; CHECK-NEXT: call void @print(i32 [[SUM7_I]]), !noalias !7 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I4]] to i8* -; CHECK-NEXT: call void @deallocate(i8* [[TMP3]]), !noalias !6 +; CHECK-NEXT: call void @deallocate(i8* [[TMP3]]), !noalias !7 ; CHECK-NEXT: ret i32 0 ; entry: Index: llvm/test/Transforms/LoopRotate/callbr.ll =================================================================== --- llvm/test/Transforms/LoopRotate/callbr.ll +++ llvm/test/Transforms/LoopRotate/callbr.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** bitcast (i64* @d to i8**), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @f, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[TMP17:%.*]], label [[DOTLR_PH2:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP17:%.*]], label [[DOTLR_PH2:%.*]], !prof !0 ; CHECK: .lr.ph2: ; CHECK-NEXT: br label [[TMP5:%.*]] ; CHECK: [[TMP6:%.*]] = phi i32 [ [[TMP3]], [[DOTLR_PH2]] ], [ [[TMP15:%.*]], [[M_EXIT:%.*]] ] @@ -27,7 +27,7 @@ ; CHECK: thread-pre-split: ; CHECK-NEXT: [[DOTPR:%.*]] = load i32, i32* @i, align 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[DOTPR]], 0 -; CHECK-NEXT: br i1 [[TMP12]], label [[M_EXIT]], label [[DOTLR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[M_EXIT]], label [[DOTLR_PH:%.*]], !prof !0 ; CHECK: .lr.ph: ; CHECK-NEXT: br label [[TMP13:%.*]] ; CHECK: [[DOT11:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP14:%.*]], [[J_EXIT_I:%.*]] ] Index: llvm/test/Transforms/LoopRotate/loopexitinglatch.ll =================================================================== --- llvm/test/Transforms/LoopRotate/loopexitinglatch.ll +++ llvm/test/Transforms/LoopRotate/loopexitinglatch.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[VAL1:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[DATA:%.*]], i32 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[VAL1]], align 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN_LR_PH:%.*]], label [[IF_ELSE6:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN_LR_PH:%.*]], label [[IF_ELSE6:%.*]], !prof !0 ; CHECK: if.then.lr.ph: ; CHECK-NEXT: br label [[IF_THEN:%.*]] ; CHECK: for.cond: @@ -121,7 +121,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[L:%.*]], align 4 ; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TOBOOL2]], label [[CLEANUP:%.*]], label [[DO_COND_LR_PH:%.*]] +; CHECK-NEXT: br i1 [[TOBOOL2]], label [[CLEANUP:%.*]], label [[DO_COND_LR_PH:%.*]], !prof !1 ; CHECK: do.cond.lr.ph: ; CHECK-NEXT: br label [[DO_COND:%.*]] ; CHECK: do.body: Index: llvm/test/Transforms/LoopSimplify/merge-exits.ll =================================================================== --- llvm/test/Transforms/LoopSimplify/merge-exits.ll +++ llvm/test/Transforms/LoopSimplify/merge-exits.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4 ; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1 ; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0 -; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]] +; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0 ; CHECK: bb.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64 ; CHECK-NEXT: br label [[BB:%.*]] @@ -85,7 +85,7 @@ ; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4 ; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1 ; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0 -; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !1 ; CHECK: bb.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64 ; CHECK-NEXT: br label [[BB:%.*]] @@ -103,7 +103,7 @@ ; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00 ; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[T10]], i1 [[T12]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !1 ; CHECK: bb1.bb3_crit_edge: ; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ] ; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ] Index: llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -10,7 +10,7 @@ ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] +; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]], !prof !0 ; CHECK-A55: for.body6.preheader: ; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 ; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3 @@ -97,7 +97,7 @@ ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]], !prof !0 ; CHECK-GENERIC: for.body6: ; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 Index: llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -92,7 +92,7 @@ ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* ; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0 -; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]], !prof !0 ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) Index: llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll +++ llvm/test/Transforms/PhaseOrdering/ARM/arm_add_q7.ll @@ -12,7 +12,7 @@ ; CHECK-LABEL: @arm_add_q7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT3:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT3]], label [[WHILE_END:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CMP_NOT3]], label [[WHILE_END:%.*]], label [[VECTOR_PH:%.*]], !prof !0 ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[BLOCKSIZE]], 15 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -16 Index: llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll +++ llvm/test/Transforms/PhaseOrdering/ARM/arm_fill_q7.ll @@ -13,7 +13,7 @@ ; OLDPM-LABEL: @arm_fill_q7( ; OLDPM-NEXT: entry: ; OLDPM-NEXT: [[CMP_NOT20:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 4 -; OLDPM-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] +; OLDPM-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]], !prof !3 ; OLDPM: while.body.preheader: ; OLDPM-NEXT: [[TMP0:%.*]] = and i32 [[BLOCKSIZE]], -4 ; OLDPM-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[PDST:%.*]], i8 [[VALUE:%.*]], i32 [[TMP0]], i1 false) @@ -33,7 +33,7 @@ ; NEWPM-LABEL: @arm_fill_q7( ; NEWPM-NEXT: entry: ; NEWPM-NEXT: [[CMP_NOT17:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 4 -; NEWPM-NEXT: br i1 [[CMP_NOT17]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] +; NEWPM-NEXT: br i1 [[CMP_NOT17]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]], !prof !3 ; NEWPM: while.body.preheader: ; NEWPM-NEXT: [[TMP0:%.*]] = and i32 [[BLOCKSIZE]], -4 ; NEWPM-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[PDST:%.*]], i8 [[VALUE:%.*]], i32 [[TMP0]], i1 false) Index: llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll +++ llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll @@ -12,7 +12,7 @@ ; CHECK-LABEL: @arm_mult_q15( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT3:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT3]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[CMP_NOT3]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]], !prof !0 ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER17:%.*]], label [[VECTOR_PH:%.*]] Index: llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll +++ llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll @@ -23,7 +23,7 @@ ; CHECK-LABEL: @ctlz_loop_with_abs( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]], !prof !0 ; CHECK: while.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true) ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] Index: llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll +++ llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll @@ -22,7 +22,7 @@ ; CHECK-LABEL: @loop_or( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[S:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]], !prof !0 ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8 Index: llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll +++ llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I11_NOT1:%.*]] = icmp eq i32* [[START:%.*]], [[END:%.*]] -; CHECK-NEXT: br i1 [[I11_NOT1]], label [[EXIT:%.*]], label [[BB12:%.*]] +; CHECK-NEXT: br i1 [[I11_NOT1]], label [[EXIT:%.*]], label [[BB12:%.*]], !prof !0 ; CHECK: bb12: ; CHECK-NEXT: [[PTR2:%.*]] = phi i32* [ [[PTR_NEXT:%.*]], [[LATCH:%.*]] ], [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR2]], align 4 Index: llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -19,7 +19,7 @@ ; OLDPM-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 ; OLDPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] ; OLDPM-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; OLDPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; OLDPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]], !prof !8 ; OLDPM: for.body7.lr.ph.i: ; OLDPM-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; OLDPM-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 @@ -50,7 +50,7 @@ ; NEWPM-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 ; NEWPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] ; NEWPM-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; NEWPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; NEWPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]], !prof !8 ; NEWPM: for.body7.lr.ph.i: ; NEWPM-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -15,7 +15,7 @@ ; CHECK-LABEL: @vdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]], !prof !3 ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 @@ -46,32 +46,32 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP4]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]], !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]], !alias.scope !8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !tbaa [[TBAA3]], !alias.scope !8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 8 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3]], !alias.scope !8 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 12 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x double>, <4 x double>* [[TMP11]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x double>, <4 x double>* [[TMP11]], align 8, !tbaa [[TBAA3]], !alias.scope !8 ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[WIDE_LOAD9]], [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD10]], [[TMP3]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP12]], <4 x double>* [[TMP17]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP12]], <4 x double>* [[TMP17]], align 8, !tbaa [[TBAA3]], !alias.scope !11, !noalias !8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 4 ; CHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP13]], <4 x double>* [[TMP19]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP13]], <4 x double>* [[TMP19]], align 8, !tbaa [[TBAA3]], !alias.scope !11, !noalias !8 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 8 ; CHECK-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP14]], <4 x double>* [[TMP21]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP14]], <4 x double>* [[TMP21]], align 8, !tbaa [[TBAA3]], !alias.scope !11, !noalias !8 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 12 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP23]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP23]], align 8, !tbaa [[TBAA3]], !alias.scope !11, !noalias !8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] Index: llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll +++ llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll @@ -115,7 +115,7 @@ ; O1-LABEL: @is_not_empty_variant3( ; O1-NEXT: entry: ; O1-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp eq %struct.node* [[P:%.*]], null -; O1-NEXT: br i1 [[TOBOOL_NOT4_I]], label [[COUNT_NODES_VARIANT3_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +; O1-NEXT: br i1 [[TOBOOL_NOT4_I]], label [[COUNT_NODES_VARIANT3_EXIT:%.*]], label [[WHILE_BODY_I:%.*]], !prof !0 ; O1: while.body.i: ; O1-NEXT: [[SIZE_06_I:%.*]] = phi i64 [ [[INC_I:%.*]], [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ] ; O1-NEXT: [[P_ADDR_05_I:%.*]] = phi %struct.node* [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY]] ] Index: llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -77,7 +77,7 @@ ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !prof !0 ; ROTATED_LATER_OLDPM: for.body.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_BODY:%.*]] @@ -101,7 +101,7 @@ ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !prof !0 ; ROTATED_LATER_NEWPM: for.body.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] @@ -125,7 +125,7 @@ ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !prof !0 ; ROTATE_OLDPM: for.body.preheader: ; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] @@ -149,7 +149,7 @@ ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !prof !0 ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]]