Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -364,88 +364,77 @@ /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. -/// Our goal is to make sure that: -/// a) The total weight of all the copies of the loop body is preserved. -/// b) The total weight of the loop exit is preserved. -/// c) The body weight is reasonably distributed between the peeled iterations. +/// Let F is a weight of the edge from latch to header. +/// Let E is a weight of the edge from latch to exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated TripCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(TC-I) increases. At the same time +/// the estimated trip count of remaining loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). /// /// \param Header The copy of the header block that belongs to next iteration. /// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param IterNumber The serial number of the iteration that was just -/// peeled off. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop -/// iterations that are unaccounted for. As an input, it represents the number -/// of times we expect to enter the header of the iteration currently being -/// peeled off. The output is the number of times we expect to enter the -/// header of the next iteration. +/// \param[in,out] FallThroughWeight The weight of the edge from latch to +/// header before peeling (in) and after peeled off one iteration (out). static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned IterNumber, unsigned AvgIters, - uint64_t &PeeledHeaderWeight) { - if (!PeeledHeaderWeight) + uint64_t ExitWeight, + uint64_t &FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // FIXME: Pick a more realistic distribution. - // Currently the proportion of weight we assign to the fall-through - // side of the branch drops linearly with the iteration number, and we use - // a 0.9 fudge factor to make the drop-off less sharp... - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + FallThroughWeight = + FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; } /// Initialize the weights. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. -/// \param[out] CurHeaderWeight The # of times the header is executed. +/// \param[out] ExitWeight The weight of the edge from Latch to Exit. +/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned AvgIters, uint64_t &ExitWeight, - uint64_t &CurHeaderWeight) { + uint64_t &ExitWeight, + uint64_t &FallThroughWeight) { uint64_t TrueWeight, FalseWeight; if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) return; unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // is taken and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; + FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; } /// Update the weights of original Latch block after peeling off all iterations. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit block. -/// \param CurHeaderWeight The # of time the header is executed. +/// \param ExitWeight The weight of the edge from Latch to Exit. +/// \param FallThroughWeight The weight of the edge from Latch to Header. static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, uint64_t CurHeaderWeight) { - // Adjust the branch weights on the loop exit. - if (!ExitWeight) + uint64_t ExitWeight, + uint64_t FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; + // Sets the branch weights on the loop exit. MDBuilder MDB(LatchBR->getContext()); unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } @@ -659,23 +648,14 @@ // newly created branches. BranchInst *LatchBR = cast(cast(Latch)->getTerminator()); - uint64_t ExitWeight = 0, CurHeaderWeight = 0; - initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); + uint64_t ExitWeight = 0, FallThroughWeight = 0; + initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; ValueToValueMapTy VMap; - // Subtract the exit weight from the current header weight -- the exit - // weight is exactly the weight of the previous iteration's header. - // FIXME: due to the way the distribution is constructed, we need a - // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < CurHeaderWeight) - CurHeaderWeight -= ExitWeight; - else - CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); @@ -697,8 +677,7 @@ } auto *LatchBRCopy = cast(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, Iter, - PeelCount, ExitWeight); + updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); @@ -724,7 +703,7 @@ PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); + fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; Index: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -4,17 +4,22 @@ ; Make sure we use the profile information correctly to peel-off 3 iterations ; from the loop, and update the branch weights for the peeled loop properly. +; All side exits to deopt does not change weigths. ; CHECK: Loop Unroll: F[basic] ; CHECK: PEELING loop %for.body with iteration count 3! ; CHECK-LABEL: @basic +; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 ; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16 ; CHECK: [[NEXT0]]: +; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 ; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17 ; CHECK: [[NEXT1]]: +; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18 ; CHECK: [[NEXT2]]: +; CHECK: br i1 %c, label %{{.*}}, label %side_exit.loopexit, !prof !15 ; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19 define i32 @basic(i32* %p, i32 %k, i1 %c) #0 !prof !15 { @@ -74,8 +79,11 @@ !16 = !{!"branch_weights", i32 3001, i32 1001} !17 = !{!"branch_weights", i32 1, i32 0} -;CHECK: !16 = !{!"branch_weights", i32 900, i32 101} -;CHECK: !17 = !{!"branch_weights", i32 540, i32 360} -;CHECK: !18 = !{!"branch_weights", i32 162, i32 378} -;CHECK: !19 = !{!"branch_weights", i32 1399, i32 162} +; This is a weights of deopt side-exit. +;CHECK: !15 = !{!"branch_weights", i32 1, i32 0} +; This is a weights of latch and its copies. +;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001} +;CHECK: !17 = !{!"branch_weights", i32 2000, i32 1001} +;CHECK: !18 = !{!"branch_weights", i32 999, i32 1001} +;CHECK: !19 = !{!"branch_weights", i32 1, i32 1001} Index: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -103,8 +103,8 @@ !15 = !{!"function_entry_count", i64 1} !16 = !{!"branch_weights", i32 3001, i32 1001} -;CHECK: !15 = !{!"branch_weights", i32 900, i32 101} -;CHECK: !16 = !{!"branch_weights", i32 540, i32 360} -;CHECK: !17 = !{!"branch_weights", i32 162, i32 378} -;CHECK: !18 = !{!"branch_weights", i32 1399, i32 162} +;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001} +;CHECK: !16 = !{!"branch_weights", i32 2000, i32 1001} +;CHECK: !17 = !{!"branch_weights", i32 999, i32 1001} +;CHECK: !18 = !{!"branch_weights", i32 1, i32 1001}