Index: include/llvm/Analysis/BlockFrequencyInfoImpl.h =================================================================== --- include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -16,6 +16,7 @@ #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" @@ -1155,35 +1156,56 @@ DEBUG(dbgs() << "isIrreducible = true\n"); Distribution Dist; unsigned NumHeadersWithWeight = 0; + Optional MinHeaderWeight; + DenseSet HeadersWithoutWeight; + HeadersWithoutWeight.reserve(Loop.NumHeaders); for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { auto &HeaderNode = Loop.Nodes[H]; const BlockT *Block = getBlock(HeaderNode); IsIrrLoopHeader.set(Loop.Nodes[H].Index); Optional HeaderWeight = Block->getIrrLoopHeaderWeight(); - if (!HeaderWeight) + if (!HeaderWeight) { + DEBUG(dbgs() << "Missing irr loop header metadata on " + << getBlockName(HeaderNode) << "\n"); + HeadersWithoutWeight.insert(H); continue; + } DEBUG(dbgs() << getBlockName(HeaderNode) << " has irr loop header weight " << HeaderWeight.getValue() << "\n"); NumHeadersWithWeight++; uint64_t HeaderWeightValue = HeaderWeight.getValue(); - if (HeaderWeightValue) + if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight) + MinHeaderWeight = HeaderWeightValue; + if (HeaderWeightValue) { Dist.addLocal(HeaderNode, HeaderWeightValue); - } - if (NumHeadersWithWeight != Loop.NumHeaders) { - // Not all headers have a weight metadata. Distribute weight evenly. - Dist = Distribution(); - for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { - auto &HeaderNode = Loop.Nodes[H]; - Dist.addLocal(HeaderNode, 1); } } + // As a heuristic, if some headers don't have a weight, give them the + // minimium weight seen (not to disrupt the existing trends too much by + // using a weight that's in the general range of the other headers' weights, + // and the minimum seems to perform better than the average.) + // FIXME: better update in the passes that drop the header weight. + // If no headers have a weight, give them even weight (use weight 1). + if (!MinHeaderWeight) + MinHeaderWeight = 1; + for (uint32_t H : HeadersWithoutWeight) { + auto &HeaderNode = Loop.Nodes[H]; + const BlockT *Block = getBlock(HeaderNode); + assert(!Block->getIrrLoopHeaderWeight() && + "Shouldn't have a weight metadata"); + uint64_t MinWeight = MinHeaderWeight.getValue(); + DEBUG(dbgs() << "Giving weight " << MinWeight + << " to " << getBlockName(HeaderNode) << "\n"); + if (MinWeight) + Dist.addLocal(HeaderNode, MinWeight); + } distributeIrrLoopHeaderMass(Dist); for (const BlockNode &M : Loop.Nodes) if (!propagateMassToSuccessors(&Loop, M)) llvm_unreachable("unhandled irreducible control flow"); - if (NumHeadersWithWeight != Loop.NumHeaders) - // Not all headers have a weight metadata. Adjust header mass. + if (NumHeadersWithWeight == 0) + // No headers have a metadata. Adjust header mass. adjustLoopHeaderMass(Loop); } else { Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1188,11 +1188,22 @@ } } +static bool isIndirectBrTarget(BasicBlock *BB) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + if (isa((*PI)->getTerminator())) + return true; + } + return false; +} + void PGOUseFunc::annotateIrrLoopHeaderWeights() { DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); // Find irr loop headers for (auto &BB : F) { - if (BFI->isIrrLoopHeader(&BB)) { + // As a heuristic also annotate indrectbr targets as they have a high chance + // to become an irreducible loop header after the indirectbr tail + // duplication. + if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { TerminatorInst *TI = BB.getTerminator(); const UseBBInfo &BBCountInfo = getBBInfo(&BB); setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); Index: test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll =================================================================== --- test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll +++ test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll @@ -159,3 +159,68 @@ ; CHECK-NEXT: - sw.default: {{.*}} count = 0 ; CHECK-NEXT: - exit: {{.*}} count = 1 ; CHECK-NEXT: - indirectgoto: {{.*}} count = 399, irr_loop_header_weight = 400 + +; Missing some irr loop annotations. +; Function Attrs: noinline norecurse nounwind uwtable +define i32 @_Z11irreduciblePh2(i8* nocapture readonly %p) !prof !27 { +entry: + %0 = load i32, i32* @tracing, align 4 + %1 = trunc i32 %0 to i8 + %tobool = icmp eq i32 %0, 0 + br label %for.cond1 + +for.cond1: ; preds = %sw.default, %entry + br label %dispatch_op + +dispatch_op: ; preds = %sw.bb6, %for.cond1 +switch i8 %1, label %sw.default [ + i8 0, label %sw.bb + i8 1, label %dispatch_op.sw.bb6_crit_edge + i8 2, label %sw.bb15 + ], !prof !36 + +dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op + br label %sw.bb6 + +sw.bb: ; preds = %indirectgoto, %dispatch_op + br label %exit + +TARGET_1: ; preds = %indirectgoto + br label %sw.bb6 + +sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge + br i1 %tobool, label %dispatch_op, label %if.then, !prof !37 ; Missing !irr_loop !38 + +if.then: ; preds = %sw.bb6 + br label %indirectgoto + +TARGET_2: ; preds = %indirectgoto + br label %sw.bb15 + +sw.bb15: ; preds = %TARGET_2, %dispatch_op + br i1 %tobool, label %if.then18, label %exit, !prof !39, !irr_loop !40 + +if.then18: ; preds = %sw.bb15 + br label %indirectgoto + +unknown_op: ; preds = %indirectgoto + br label %sw.default + +sw.default: ; preds = %unknown_op, %dispatch_op + br label %for.cond1 + +exit: ; preds = %sw.bb15, %sw.bb + ret i32 0 + +indirectgoto: ; preds = %if.then18, %if.then + %idxprom21 = zext i32 %0 to i64 + %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21 + %target = load i8*, i8** %arrayidx22, align 8 + indirectbr i8* %target, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !41, !irr_loop !42 +} + +; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreduciblePh2': +; CHECK: block-frequency-info: _Z11irreduciblePh2 +; CHECK: - sw.bb6: {{.*}} count = 100 +; CHECK: - sw.bb15: {{.*}} count = 100, irr_loop_header_weight = 100 +; CHECK: - indirectgoto: {{.*}} count = 400, irr_loop_header_weight = 400 Index: test/Transforms/PGOProfile/irreducible.ll =================================================================== --- test/Transforms/PGOProfile/irreducible.ll +++ test/Transforms/PGOProfile/irreducible.ll @@ -91,6 +91,7 @@ TARGET_1: ; preds = %indirectgoto br label %sw.bb6 +; USE: br label %sw.bb6, !irr_loop {{.*}} sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge br i1 %tobool, label %dispatch_op, label %if.then @@ -102,6 +103,7 @@ TARGET_2: ; preds = %indirectgoto br label %sw.bb15 +; USE: br label %sw.bb15, !irr_loop {{.*}} sw.bb15: ; preds = %TARGET_2, %dispatch_op br i1 %tobool, label %if.then18, label %exit