diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -66,7 +66,7 @@ #define DEBUG_TYPE "loop-fusion" -STATISTIC(FuseCounter, "Count number of loop fusions performed"); +STATISTIC(FuseCounter, "Loops fused"); STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion"); STATISTIC(InvalidPreheader, "Loop has invalid preheader"); STATISTIC(InvalidHeader, "Loop has invalid header"); @@ -79,12 +79,12 @@ STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access"); STATISTIC(NotSimplifiedForm, "Loop is not in simplified form"); STATISTIC(InvalidDependencies, "Dependencies prevent fusion"); -STATISTIC(InvalidTripCount, - "Loop does not have invariant backedge taken count"); +STATISTIC(InvalidTripCount, "Loop has unknown trip count"); STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop"); -STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same"); -STATISTIC(NonAdjacent, "Candidates are not adjacent"); -STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader"); +STATISTIC(NonEqualTripCount, "Loop trip counts are not the same"); +STATISTIC(NonAdjacent, "Loops are not adjacent"); +STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader"); +STATISTIC(FusionNotBeneficial, "Fusion is not beneficial"); enum FusionDependenceAnalysisChoice { FUSION_DEPENDENCE_ANALYSIS_SCEV, @@ -151,11 +151,14 @@ const DominatorTree *DT; const PostDominatorTree *PDT; + OptimizationRemarkEmitter &ORE; + FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), - Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) { + Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT), + ORE(ORE) { // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -163,28 +166,28 @@ // found, invalidate this object and return. for (BasicBlock *BB : L->blocks()) { if (BB->hasAddressTaken()) { - AddressTakenBB++; invalidate(); + reportInvalidCandidate(AddressTakenBB); return; } for (Instruction &I : *BB) { if (I.mayThrow()) { - MayThrowException++; invalidate(); + reportInvalidCandidate(MayThrowException); return; } if (StoreInst *SI = dyn_cast(&I)) { if (SI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } if (LoadInst *LI = dyn_cast(&I)) { if (LI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } @@ -227,6 +230,44 @@ } #endif + /// Determine if a fusion candidate (representing a loop) is eligible for + /// fusion. Note that this only checks whether a single loop can be fused - it + /// does not check whether it is *legal* to fuse two loops together. + bool isEligibleForFusion(ScalarEvolution &SE) const { + if (!isValid()) { + LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n"); + if (!Preheader) + ++InvalidPreheader; + if (!Header) + ++InvalidHeader; + if (!ExitingBlock) + ++InvalidExitingBlock; + if (!ExitBlock) + ++InvalidExitBlock; + if (!Latch) + ++InvalidLatch; + if (L->isInvalid()) + ++InvalidLoop; + + return false; + } + + // Require ScalarEvolution to be able to determine a trip count. + if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " trip count not computable!\n"); + return reportInvalidCandidate(InvalidTripCount); + } + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " is not in simplified form!\n"); + return reportInvalidCandidate(NotSimplifiedForm); + } + + return true; + } + private: // This is only used internally for now, to clear the MemWrites and MemReads // list and setting Valid to false. I can't envision other uses of this right @@ -239,6 +280,17 @@ MemReads.clear(); Valid = false; } + + bool reportInvalidCandidate(llvm::Statistic &Stat) const { + using namespace ore; + assert(L && Preheader && "Fusion candidate not initialized properly!"); + ++Stat; + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(), + L->getStartLoc(), Preheader) + << "[" << Preheader->getParent()->getName() << "]: " + << "Loop is not a candidate for fusion: " << Stat.getDesc()); + return false; + } }; inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, @@ -391,16 +443,6 @@ } #endif -static void reportLoopFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) - << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) - << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); -} - struct LoopFuser { private: // Sets of control flow equivalent fusion candidates for a given nest level. @@ -506,53 +548,13 @@ return false; } - /// Determine if a fusion candidate (representing a loop) is eligible for - /// fusion. Note that this only checks whether a single loop can be fused - it - /// does not check whether it is *legal* to fuse two loops together. - bool eligibleForFusion(const FusionCandidate &FC) const { - if (!FC.isValid()) { - LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n"); - if (!FC.Preheader) - InvalidPreheader++; - if (!FC.Header) - InvalidHeader++; - if (!FC.ExitingBlock) - InvalidExitingBlock++; - if (!FC.ExitBlock) - InvalidExitBlock++; - if (!FC.Latch) - InvalidLatch++; - if (FC.L->isInvalid()) - InvalidLoop++; - - return false; - } - - // Require ScalarEvolution to be able to determine a trip count. - if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " trip count not computable!\n"); - InvalidTripCount++; - return false; - } - - if (!FC.L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " is not in simplified form!\n"); - NotSimplifiedForm++; - return false; - } - - return true; - } - /// Iterate over all loops in the given loop set and identify the loops that /// are eligible for fusion. Place all eligible fusion candidates into Control /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - FusionCandidate CurrCand(L, &DT, &PDT); - if (!eligibleForFusion(CurrCand)) + FusionCandidate CurrCand(L, &DT, &PDT, ORE); + if (!CurrCand.isEligibleForFusion(SE)) continue; // Go through each list in FusionCandidates and determine if L is control @@ -664,14 +666,15 @@ if (!identicalTripCounts(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); - NonEqualTripCount++; + reportLoopFusion(*FC0, *FC1, + NonEqualTripCount); continue; } if (!isAdjacent(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates are not adjacent. Not fusing.\n"); - NonAdjacent++; + reportLoopFusion(*FC0, *FC1, NonAdjacent); continue; } @@ -683,12 +686,15 @@ if (!isEmptyPreheader(*FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty " "preheader. Not fusing.\n"); - NonEmptyPreheader++; + reportLoopFusion(*FC0, *FC1, + NonEmptyPreheader); continue; } if (!dependencesAllowFusion(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n"); + reportLoopFusion(*FC0, *FC1, + InvalidDependencies); continue; } @@ -696,9 +702,11 @@ LLVM_DEBUG(dbgs() << "\tFusion appears to be " << (BeneficialToFuse ? "" : "un") << "profitable!\n"); - if (!BeneficialToFuse) + if (!BeneficialToFuse) { + reportLoopFusion(*FC0, *FC1, + FusionNotBeneficial); continue; - + } // All analysis has completed and has determined that fusion is legal // and profitable. At this point, start transforming the code and // perform fusion. @@ -710,15 +718,14 @@ // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion(*FC0, *FC1, ORE); + reportLoopFusion(*FC0, *FC1, FuseCounter); - FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT); + FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE); FusedCand.verify(); - assert(eligibleForFusion(FusedCand) && + assert(FusedCand.isEligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); // Notify the loop-depth-tree that these loops are not valid objects - // anymore. LDT.removeLoop(FC1->L); CandidateSet.erase(FC0); @@ -1137,6 +1144,33 @@ return FC0.L; } + + /// Report details on loop fusion opportunities. + /// + /// This template function can be used to report both successful and missed + /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should + /// be one of: + /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful + /// given two valid fusion candidates. + /// - OptimizationRemark to report successful fusion of two fusion + /// candidates. + /// The remarks will be printed using the form: + /// ::: []: + /// and : + template + void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1, + llvm::Statistic &Stat) { + assert(FC0.Preheader && FC1.Preheader && + "Expectingf valid fusion candidates"); + using namespace ore; + ++Stat; + ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(), + FC0.Preheader) + << "[" << FC0.Preheader->getParent()->getName() + << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName())) + << " and " << NV("Cand2", StringRef(FC1.Preheader->getName())) + << ": " << Stat.getDesc()); + } }; struct LoopFuseLegacy : public FunctionPass { diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll b/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll @@ -0,0 +1,130 @@ +; RUN: opt -S -loop-fusion -pass-remarks-analysis=loop-fusion -disable-output < %s 2>&1 | FileCheck %s + +@B = common global [1024 x i32] zeroinitializer, align 16 + +; CHECK: remark: diagnostics_analysis.c:6:3: [test]: Loop is not a candidate for fusion: Loop contains a volatile access +; CHECK: remark: diagnostics_analysis.c:10:3: [test]: Loop is not a candidate for fusion: Loop has unknown trip count +define void @test(i32* %A, i32 %n) !dbg !15 { +entry: + %A.addr = alloca i32*, align 8 + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + %i1 = alloca i32, align 4 + store i32* %A, i32** %A.addr, align 8 + store i32 %n, i32* %n.addr, align 4 + %0 = bitcast i32* %i to i8* + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, i32* %i, align 4 + %2 = load i32, i32* %n.addr, align 4 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %3 = bitcast i32* %i to i8*, !dbg !42 + br label %for.end + +for.body: ; preds = %for.cond + %4 = load i32, i32* %i, align 4 + %sub = sub nsw i32 %4, 3 + %5 = load i32, i32* %i, align 4 + %add = add nsw i32 %5, 3 + %mul = mul nsw i32 %sub, %add + %6 = load i32, i32* %i, align 4 + %rem = srem i32 %mul, %6 + %7 = load i32*, i32** %A.addr, align 8 + %8 = load i32, i32* %i, align 4 + %idxprom = sext i32 %8 to i64 + %arrayidx = getelementptr inbounds i32, i32* %7, i64 %idxprom + store volatile i32 %rem, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %9 = load i32, i32* %i, align 4, !dbg !49 + %inc = add nsw i32 %9, 1, !dbg !49 + store i32 %inc, i32* %i, align 4, !dbg !49 + br label %for.cond, !dbg !42, !llvm.loop !50 + +for.end: ; preds = %for.cond.cleanup + %10 = bitcast i32* %i1 to i8* + store i32 0, i32* %i1, align 4 + br label %for.cond2 + +for.cond2: ; preds = %for.inc12, %for.end + %11 = load i32, i32* %i1, align 4 + %12 = load i32, i32* %n.addr, align 4 + %cmp3 = icmp slt i32 %11, %12 + br i1 %cmp3, label %for.body5, label %for.cond.cleanup4 + +for.cond.cleanup4: ; preds = %for.cond2 + %13 = bitcast i32* %i1 to i8* + br label %for.end14 + +for.body5: ; preds = %for.cond2 + %14 = load i32, i32* %i1, align 4 + %sub6 = sub nsw i32 %14, 3 + %15 = load i32, i32* %i1, align 4 + %add7 = add nsw i32 %15, 3 + %mul8 = mul nsw i32 %sub6, %add7 + %16 = load i32, i32* %i1, align 4 + %rem9 = srem i32 %mul8, %16 + %17 = load i32, i32* %i1, align 4 + %idxprom10 = sext i32 %17 to i64 + %arrayidx11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %idxprom10 + store i32 %rem9, i32* %arrayidx11, align 4 + br label %for.inc12 + +for.inc12: ; preds = %for.body5 + %18 = load i32, i32* %i1, align 4 + %inc13 = add nsw i32 %18, 1 + store i32 %inc13, i32* %i1, align 4 + br label %for.cond2, !dbg !59, !llvm.loop !67 + +for.end14: ; preds = %for.cond.cleanup4 + ret void +} + +!llvm.module.flags = !{!10, !11, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU) +!3 = !DIFile(filename: "diagnostics_analysis.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 1024) +!10 = !{i32 2, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 7, !"PIC Level", i32 2} +!14 = !{!"clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)"} +!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !20) +!16 = !DISubroutineType(types: !17) +!17 = !{null, !18, !7} +!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) +!19 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7) +!20 = !{!21, !22, !23, !25} +!21 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !18) +!22 = !DILocalVariable(name: "n", arg: 2, scope: !15, file: !3, line: 5, type: !7) +!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 6, type: !7) +!24 = distinct !DILexicalBlock(scope: !15, file: !3, line: 6, column: 3) +!25 = !DILocalVariable(name: "i", scope: !26, file: !3, line: 10, type: !7) +!26 = distinct !DILexicalBlock(scope: !15, file: !3, line: 10, column: 3) +!38 = distinct !DILexicalBlock(scope: !24, file: !3, line: 6, column: 3) +!41 = !DILocation(line: 6, column: 3, scope: !24) +!42 = !DILocation(line: 6, column: 3, scope: !38) +!44 = distinct !DILexicalBlock(scope: !38, file: !3, line: 6, column: 31) +!49 = !DILocation(line: 6, column: 27, scope: !38) +!50 = distinct !{!50, !41, !51} +!51 = !DILocation(line: 8, column: 3, scope: !24) +!55 = distinct !DILexicalBlock(scope: !26, file: !3, line: 10, column: 3) +!58 = !DILocation(line: 10, column: 3, scope: !26) +!59 = !DILocation(line: 10, column: 3, scope: !55) +!67 = distinct !{!67, !58, !68} +!68 = !DILocation(line: 12, column: 3, scope: !26) +!69 = !DILocation(line: 13, column: 1, scope: !15) diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll @@ -0,0 +1,301 @@ +; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s +; +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0 + +; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent +define void @non_adjacent(i32* noalias %A) !dbg !67 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond1 = icmp ne i64 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %sub = add nsw i64 %i.0, -3 + %add = add nuw nsw i64 %i.0, 3 + %mul = mul nsw i64 %sub, %add + %rem = srem i64 %mul, %i.0 + %conv = trunc i64 %rem to i32 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0 + store i32 %conv, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nuw nsw i64 %i.0, 1, !dbg !86 + br label %for.cond, !dbg !87, !llvm.loop !88 + +for.end: ; preds = %for.cond.cleanup + br label %for.cond2 + +for.cond2: ; preds = %for.inc13, %for.end + %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] + %exitcond = icmp ne i64 %i1.0, 100 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5 + +for.cond.cleanup5: ; preds = %for.cond2 + br label %for.end15 + +for.body6: ; preds = %for.cond2 + %sub7 = add nsw i64 %i1.0, -3 + %add8 = add nuw nsw i64 %i1.0, 3 + %mul9 = mul nsw i64 %sub7, %add8 + %rem10 = srem i64 %mul9, %i1.0 + %conv11 = trunc i64 %rem10 to i32 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0 + store i32 %conv11, i32* %arrayidx12, align 4 + br label %for.inc13 + +for.inc13: ; preds = %for.body6 + %inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100 + br label %for.cond2, !dbg !101, !llvm.loop !102 + +for.end15: ; preds = %for.cond.cleanup5 + ret void +} + + +; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same +define void @different_bounds(i32* noalias %A) !dbg !105 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond1 = icmp ne i64 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %sub = add nsw i64 %i.0, -3 + %add = add nuw nsw i64 %i.0, 3 + %mul = mul nsw i64 %sub, %add + %rem = srem i64 %mul, %i.0 + %conv = trunc i64 %rem to i32 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0 + store i32 %conv, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nuw nsw i64 %i.0, 1, !dbg !123 + br label %for.cond, !dbg !124, !llvm.loop !125 + +for.end: ; preds = %for.cond.cleanup + br label %for.cond2 + +for.cond2: ; preds = %for.inc13, %for.end + %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] + %exitcond = icmp ne i64 %i1.0, 200 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5 + +for.cond.cleanup5: ; preds = %for.cond2 + br label %for.end15 + +for.body6: ; preds = %for.cond2 + %sub7 = add nsw i64 %i1.0, -3 + %add8 = add nuw nsw i64 %i1.0, 3 + %mul9 = mul nsw i64 %sub7, %add8 + %rem10 = srem i64 %mul9, %i1.0 + %conv11 = trunc i64 %rem10 to i32 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0 + store i32 %conv11, i32* %arrayidx12, align 4 + br label %for.inc13 + +for.inc13: ; preds = %for.body6 + %inc14 = add nuw nsw i64 %i1.0, 1 + br label %for.cond2, !dbg !138, !llvm.loop !139 + +for.end15: ; preds = %for.cond.cleanup5 + ret void +} + +; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader +define void @negative_dependence(i32* noalias %A) !dbg !142 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] + %exitcond3 = icmp ne i64 %indvars.iv1, 100 + br i1 %exitcond3, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = trunc i64 %indvars.iv1 to i32 + store i32 %tmp, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond, !dbg !160, !llvm.loop !161 + +for.end: ; preds = %for.cond + call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163 + br label %for.cond2, !dbg !164 + +for.cond2: ; preds = %for.inc10, %for.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body5, label %for.end12 + +for.body5: ; preds = %for.cond2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %tmp4 = load i32, i32* %arrayidx7, align 4 + %mul = shl nsw i32 %tmp4, 1 + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + store i32 %mul, i32* %arrayidx9, align 4 + br label %for.inc10 + +for.inc10: ; preds = %for.body5 + br label %for.cond2 + +for.end12: ; preds = %for.cond. + ret void, !dbg !178 +} + +; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: entry and for.cond2.preheader: Dependencies prevent fusion +define i32 @sumTest(i32* noalias %A) !dbg !179 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] + %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %exitcond3 = icmp ne i64 %indvars.iv1, 100 + br i1 %exitcond3, label %for.body, label %for.cond2 + +for.body: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %sum.0, %tmp + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond, !dbg !199, !llvm.loop !200 + +for.cond2: ; preds = %for.inc10, %for.cond + %sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ] + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body5, label %for.end12 + +for.body5: ; preds = %for.cond2 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp4 = load i32, i32* %arrayidx7, align 4 + %div = sdiv i32 %tmp4, %sum.0.lcssa + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + store i32 %div, i32* %arrayidx9, align 4 + br label %for.inc10 + +for.inc10: ; preds = %for.body5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond2 + +for.end12: ; preds = %for.cond2 + ret i32 %sum.0.lcssa, !dbg !215 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13, !14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU) +!3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") +!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !DISubrange(count: 1024) +!11 = !{i32 2, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{i32 7, !"PIC Level", i32 2} +!17 = !DISubroutineType(types: !18) +!18 = !{null, !19} +!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20) +!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) +!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68) +!68 = !{!69, !70, !73} +!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19) +!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72) +!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3) +!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) +!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72) +!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3) +!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3) +!80 = !DILocation(line: 18, column: 3, scope: !71) +!86 = !DILocation(line: 18, column: 30, scope: !79) +!87 = !DILocation(line: 18, column: 3, scope: !79) +!88 = distinct !{!88, !80, !89} +!89 = !DILocation(line: 20, column: 3, scope: !71) +!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3) +!94 = !DILocation(line: 22, column: 3, scope: !74) +!100 = !DILocation(line: 22, column: 30, scope: !93) +!101 = !DILocation(line: 22, column: 3, scope: !93) +!102 = distinct !{!102, !94, !103} +!103 = !DILocation(line: 24, column: 3, scope: !74) +!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106) +!106 = !{!107, !108, !110} +!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19) +!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72) +!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3) +!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72) +!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3) +!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3) +!117 = !DILocation(line: 28, column: 3, scope: !109) +!123 = !DILocation(line: 28, column: 30, scope: !116) +!124 = !DILocation(line: 28, column: 3, scope: !116) +!125 = distinct !{!125, !117, !126} +!126 = !DILocation(line: 30, column: 3, scope: !109) +!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3) +!131 = !DILocation(line: 32, column: 3, scope: !111) +!138 = !DILocation(line: 32, column: 3, scope: !130) +!139 = distinct !{!139, !131, !140} +!140 = !DILocation(line: 34, column: 3, scope: !111) +!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143) +!143 = !{!144, !145, !147} +!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19) +!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8) +!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3) +!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8) +!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3) +!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3) +!154 = !DILocation(line: 38, column: 3, scope: !146) +!160 = !DILocation(line: 38, column: 3, scope: !153) +!161 = distinct !{!161, !154, !162} +!162 = !DILocation(line: 40, column: 3, scope: !146) +!163 = !DILocation(line: 0, scope: !148) +!164 = !DILocation(line: 42, column: 8, scope: !148) +!178 = !DILocation(line: 45, column: 1, scope: !142) +!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182) +!180 = !DISubroutineType(types: !181) +!181 = !{!8, !19} +!182 = !{!183, !184, !185, !187} +!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19) +!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8) +!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8) +!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3) +!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8) +!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3) +!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3) +!194 = !DILocation(line: 51, column: 3, scope: !186) +!199 = !DILocation(line: 51, column: 3, scope: !193) +!200 = distinct !{!200, !194, !201} +!201 = !DILocation(line: 52, column: 15, scope: !186) +!215 = !DILocation(line: 57, column: 3, scope: !179)