diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -79,12 +79,12 @@ STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access"); STATISTIC(NotSimplifiedForm, "Loop is not in simplified form"); STATISTIC(InvalidDependencies, "Dependencies prevent fusion"); -STATISTIC(InvalidTripCount, - "Loop does not have invariant backedge taken count"); +STATISTIC(InvalidTripCount, "Loop has unknown trip count"); STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop"); -STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same"); -STATISTIC(NonAdjacent, "Candidates are not adjacent"); -STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader"); +STATISTIC(NonEqualTripCount, "Loop trip counts are not the same"); +STATISTIC(NonAdjacent, "Loops are not adjacent"); +STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader"); +STATISTIC(FusionNotBeneficial, "Fusion is not beneficial"); enum FusionDependenceAnalysisChoice { FUSION_DEPENDENCE_ANALYSIS_SCEV, @@ -151,11 +151,14 @@ const DominatorTree *DT; const PostDominatorTree *PDT; + OptimizationRemarkEmitter &ORE; + FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), - Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) { + Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT), + ORE(ORE) { // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -163,28 +166,28 @@ // found, invalidate this object and return. for (BasicBlock *BB : L->blocks()) { if (BB->hasAddressTaken()) { - AddressTakenBB++; invalidate(); + reportInvalidCandidate(AddressTakenBB); return; } for (Instruction &I : *BB) { if (I.mayThrow()) { - MayThrowException++; invalidate(); + reportInvalidCandidate(MayThrowException); return; } if (StoreInst *SI = dyn_cast(&I)) { if (SI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } if (LoadInst *LI = dyn_cast(&I)) { if (LI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } @@ -227,6 +230,44 @@ } #endif + /// Determine if a fusion candidate (representing a loop) is eligible for + /// fusion. Note that this only checks whether a single loop can be fused - it + /// does not check whether it is *legal* to fuse two loops together. + bool eligibleForFusion(ScalarEvolution &SE) const { + if (!isValid()) { + LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n"); + if (!Preheader) + InvalidPreheader++; + if (!Header) + InvalidHeader++; + if (!ExitingBlock) + InvalidExitingBlock++; + if (!ExitBlock) + InvalidExitBlock++; + if (!Latch) + InvalidLatch++; + if (L->isInvalid()) + InvalidLoop++; + + return false; + } + + // Require ScalarEvolution to be able to determine a trip count. + if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " trip count not computable!\n"); + return reportInvalidCandidate(InvalidTripCount); + } + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " is not in simplified form!\n"); + return reportInvalidCandidate(NotSimplifiedForm); + } + + return true; + } + private: // This is only used internally for now, to clear the MemWrites and MemReads // list and setting Valid to false. I can't envision other uses of this right @@ -239,6 +280,17 @@ MemReads.clear(); Valid = false; } + + bool reportInvalidCandidate(llvm::Statistic &Stat) const { + using namespace ore; + assert(L && Preheader && "Fusion candidate not initialized properly!"); + Stat++; + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(), + L->getStartLoc(), Preheader) + << "[" << Preheader->getParent()->getName() << "]: " + << "Loop is not a candidate for fusion: " << Stat.getDesc()); + return false; + } }; inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, @@ -391,16 +443,6 @@ } #endif -static void reportLoopFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) - << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) - << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); -} - struct LoopFuser { private: // Sets of control flow equivalent fusion candidates for a given nest level. @@ -506,53 +548,14 @@ return false; } - /// Determine if a fusion candidate (representing a loop) is eligible for - /// fusion. Note that this only checks whether a single loop can be fused - it - /// does not check whether it is *legal* to fuse two loops together. - bool eligibleForFusion(const FusionCandidate &FC) const { - if (!FC.isValid()) { - LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n"); - if (!FC.Preheader) - InvalidPreheader++; - if (!FC.Header) - InvalidHeader++; - if (!FC.ExitingBlock) - InvalidExitingBlock++; - if (!FC.ExitBlock) - InvalidExitBlock++; - if (!FC.Latch) - InvalidLatch++; - if (FC.L->isInvalid()) - InvalidLoop++; - - return false; - } - - // Require ScalarEvolution to be able to determine a trip count. - if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " trip count not computable!\n"); - InvalidTripCount++; - return false; - } - - if (!FC.L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " is not in simplified form!\n"); - NotSimplifiedForm++; - return false; - } - - return true; - } /// Iterate over all loops in the given loop set and identify the loops that /// are eligible for fusion. Place all eligible fusion candidates into Control /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - FusionCandidate CurrCand(L, &DT, &PDT); - if (!eligibleForFusion(CurrCand)) + FusionCandidate CurrCand(L, &DT, &PDT, ORE); + if (!CurrCand.eligibleForFusion(SE)) continue; // Go through each list in FusionCandidates and determine if L is control @@ -664,14 +667,14 @@ if (!identicalTripCounts(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); - NonEqualTripCount++; + reportNoFusion(*FC0, *FC1, NonEqualTripCount); continue; } if (!isAdjacent(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates are not adjacent. Not fusing.\n"); - NonAdjacent++; + reportNoFusion(*FC0, *FC1, NonAdjacent); continue; } @@ -683,12 +686,13 @@ if (!isEmptyPreheader(*FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty " "preheader. Not fusing.\n"); - NonEmptyPreheader++; + reportNoFusion(*FC0, *FC1, NonEmptyPreheader); continue; } if (!dependencesAllowFusion(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n"); + reportNoFusion(*FC0, *FC1, InvalidDependencies); continue; } @@ -696,9 +700,10 @@ LLVM_DEBUG(dbgs() << "\tFusion appears to be " << (BeneficialToFuse ? "" : "un") << "profitable!\n"); - if (!BeneficialToFuse) + if (!BeneficialToFuse) { + reportNoFusion(*FC0, *FC1, FusionNotBeneficial); continue; - + } // All analysis has completed and has determined that fusion is legal // and profitable. At this point, start transforming the code and // perform fusion. @@ -710,15 +715,14 @@ // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion(*FC0, *FC1, ORE); + reportLoopFusion(*FC0, *FC1); - FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT); + FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE); FusedCand.verify(); - assert(eligibleForFusion(FusedCand) && + assert(FusedCand.eligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); // Notify the loop-depth-tree that these loops are not valid objects - // anymore. LDT.removeLoop(FC1->L); CandidateSet.erase(FC0); @@ -1137,6 +1141,36 @@ return FC0.L; } + + void reportLoopFusion(const FusionCandidate &FC0, + const FusionCandidate &FC1) { + using namespace ore; + ORE.emit( + OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) + << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) + << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); + } + + /// Report missed loop fusion opportunity and the reason. + /// + /// This method will report a missed loop fusion opportunity using the + /// OptimizationRemarkEmitter object. It will print using the form: + /// ::: []: Could + /// not fuse with : + void reportNoFusion(const FusionCandidate &FC0, const FusionCandidate &FC1, + llvm::Statistic &Stat) { + using namespace ore; + + // Increment the statistic and add a report to the Optimization Remarks + Stat++; + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, Stat.getName(), + FC0.L->getStartLoc(), FC0.Preheader) + << "[" << FC0.Preheader->getParent()->getName() << "]: " + << "Could not fuse " + << NV("Cand1", StringRef(FC0.Preheader->getName())) << " with " + << NV("Cand2", StringRef(FC1.Preheader->getName())) << ": " + << Stat.getDesc()); + } }; struct LoopFuseLegacy : public FunctionPass { diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll b/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/diagnostics_analysis.ll @@ -0,0 +1,164 @@ +; RUN: opt -S -loop-fusion -pass-remarks-analysis=loop-fusion -disable-output < %s 2>&1 | FileCheck %s +; +; ModuleID = 'diagnostics_analysis.c' + +@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0 + +; CHECK: remark: diagnostics_analysis.c:6:3: [test]: Loop is not a candidate for fusion: Loop contains a volatile access +; CHECK: remark: diagnostics_analysis.c:10:3: [test]: Loop is not a candidate for fusion: Loop has unknown trip count +define void @test(i32* %A, i32 %n) !dbg !15 { +entry: + %A.addr = alloca i32*, align 8 + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + %i1 = alloca i32, align 4 + store i32* %A, i32** %A.addr, align 8, !tbaa !27 + store i32 %n, i32* %n.addr, align 4, !tbaa !32 + %0 = bitcast i32* %i to i8*, !dbg !35 + store i32 0, i32* %i, align 4, !dbg !36, !tbaa !32 + br label %for.cond, !dbg !35 + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, i32* %i, align 4, !dbg !37, !tbaa !32 + %2 = load i32, i32* %n.addr, align 4, !dbg !39, !tbaa !32 + %cmp = icmp slt i32 %1, %2, !dbg !40 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !41 + +for.cond.cleanup: ; preds = %for.cond + %3 = bitcast i32* %i to i8*, !dbg !42 + br label %for.end + +for.body: ; preds = %for.cond + %4 = load i32, i32* %i, align 4, !dbg !43, !tbaa !32 + %sub = sub nsw i32 %4, 3, !dbg !43 + %5 = load i32, i32* %i, align 4, !dbg !43, !tbaa !32 + %add = add nsw i32 %5, 3, !dbg !43 + %mul = mul nsw i32 %sub, %add, !dbg !43 + %6 = load i32, i32* %i, align 4, !dbg !43, !tbaa !32 + %rem = srem i32 %mul, %6, !dbg !43 + %7 = load i32*, i32** %A.addr, align 8, !dbg !45, !tbaa !27 + %8 = load i32, i32* %i, align 4, !dbg !46, !tbaa !32 + %idxprom = sext i32 %8 to i64, !dbg !45 + %arrayidx = getelementptr inbounds i32, i32* %7, i64 %idxprom, !dbg !45 + store volatile i32 %rem, i32* %arrayidx, align 4, !dbg !47, !tbaa !32 + br label %for.inc, !dbg !48 + +for.inc: ; preds = %for.body + %9 = load i32, i32* %i, align 4, !dbg !49, !tbaa !32 + %inc = add nsw i32 %9, 1, !dbg !49 + store i32 %inc, i32* %i, align 4, !dbg !49, !tbaa !32 + br label %for.cond, !dbg !42, !llvm.loop !50 + +for.end: ; preds = %for.cond.cleanup + %10 = bitcast i32* %i1 to i8*, !dbg !52 + store i32 0, i32* %i1, align 4, !dbg !53, !tbaa !32 + br label %for.cond2, !dbg !52 + +for.cond2: ; preds = %for.inc12, %for.end + %11 = load i32, i32* %i1, align 4, !dbg !54, !tbaa !32 + %12 = load i32, i32* %n.addr, align 4, !dbg !56, !tbaa !32 + %cmp3 = icmp slt i32 %11, %12, !dbg !57 + br i1 %cmp3, label %for.body5, label %for.cond.cleanup4, !dbg !58 + +for.cond.cleanup4: ; preds = %for.cond2 + %13 = bitcast i32* %i1 to i8*, !dbg !59 + br label %for.end14 + +for.body5: ; preds = %for.cond2 + %14 = load i32, i32* %i1, align 4, !dbg !60, !tbaa !32 + %sub6 = sub nsw i32 %14, 3, !dbg !60 + %15 = load i32, i32* %i1, align 4, !dbg !60, !tbaa !32 + %add7 = add nsw i32 %15, 3, !dbg !60 + %mul8 = mul nsw i32 %sub6, %add7, !dbg !60 + %16 = load i32, i32* %i1, align 4, !dbg !60, !tbaa !32 + %rem9 = srem i32 %mul8, %16, !dbg !60 + %17 = load i32, i32* %i1, align 4, !dbg !62, !tbaa !32 + %idxprom10 = sext i32 %17 to i64, !dbg !63 + %arrayidx11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %idxprom10, !dbg !63 + store i32 %rem9, i32* %arrayidx11, align 4, !dbg !64, !tbaa !32 + br label %for.inc12, !dbg !65 + +for.inc12: ; preds = %for.body5 + %18 = load i32, i32* %i1, align 4, !dbg !66, !tbaa !32 + %inc13 = add nsw i32 %18, 1, !dbg !66 + store i32 %inc13, i32* %i1, align 4, !dbg !66, !tbaa !32 + br label %for.cond2, !dbg !59, !llvm.loop !67 + +for.end14: ; preds = %for.cond.cleanup4 + ret void, !dbg !69 +} + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU) +!3 = !DIFile(filename: "diagnostics_analysis.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 1024) +!10 = !{i32 2, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"PIC Level", i32 2} +!14 = !{!"clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)"} +!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !20) +!16 = !DISubroutineType(types: !17) +!17 = !{null, !18, !7} +!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) +!19 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7) +!20 = !{!21, !22, !23, !25} +!21 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !18) +!22 = !DILocalVariable(name: "n", arg: 2, scope: !15, file: !3, line: 5, type: !7) +!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 6, type: !7) +!24 = distinct !DILexicalBlock(scope: !15, file: !3, line: 6, column: 3) +!25 = !DILocalVariable(name: "i", scope: !26, file: !3, line: 10, type: !7) +!26 = distinct !DILexicalBlock(scope: !15, file: !3, line: 10, column: 3) +!27 = !{!28, !28, i64 0} +!28 = !{!"any pointer", !29, i64 0} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !DILocation(line: 5, column: 25, scope: !15) +!32 = !{!33, !33, i64 0} +!33 = !{!"int", !29, i64 0} +!34 = !DILocation(line: 5, column: 32, scope: !15) +!35 = !DILocation(line: 6, column: 8, scope: !24) +!36 = !DILocation(line: 6, column: 12, scope: !24) +!37 = !DILocation(line: 6, column: 19, scope: !38) +!38 = distinct !DILexicalBlock(scope: !24, file: !3, line: 6, column: 3) +!39 = !DILocation(line: 6, column: 23, scope: !38) +!40 = !DILocation(line: 6, column: 21, scope: !38) +!41 = !DILocation(line: 6, column: 3, scope: !24) +!42 = !DILocation(line: 6, column: 3, scope: !38) +!43 = !DILocation(line: 7, column: 12, scope: !44) +!44 = distinct !DILexicalBlock(scope: !38, file: !3, line: 6, column: 31) +!45 = !DILocation(line: 7, column: 5, scope: !44) +!46 = !DILocation(line: 7, column: 7, scope: !44) +!47 = !DILocation(line: 7, column: 10, scope: !44) +!48 = !DILocation(line: 8, column: 3, scope: !44) +!49 = !DILocation(line: 6, column: 27, scope: !38) +!50 = distinct !{!50, !41, !51} +!51 = !DILocation(line: 8, column: 3, scope: !24) +!52 = !DILocation(line: 10, column: 8, scope: !26) +!53 = !DILocation(line: 10, column: 12, scope: !26) +!54 = !DILocation(line: 10, column: 19, scope: !55) +!55 = distinct !DILexicalBlock(scope: !26, file: !3, line: 10, column: 3) +!56 = !DILocation(line: 10, column: 23, scope: !55) +!57 = !DILocation(line: 10, column: 21, scope: !55) +!58 = !DILocation(line: 10, column: 3, scope: !26) +!59 = !DILocation(line: 10, column: 3, scope: !55) +!60 = !DILocation(line: 11, column: 12, scope: !61) +!61 = distinct !DILexicalBlock(scope: !55, file: !3, line: 10, column: 31) +!62 = !DILocation(line: 11, column: 7, scope: !61) +!63 = !DILocation(line: 11, column: 5, scope: !61) +!64 = !DILocation(line: 11, column: 10, scope: !61) +!65 = !DILocation(line: 12, column: 3, scope: !61) +!66 = !DILocation(line: 10, column: 27, scope: !55) +!67 = distinct !{!67, !58, !68} +!68 = !DILocation(line: 12, column: 3, scope: !26) +!69 = !DILocation(line: 13, column: 1, scope: !15) diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll @@ -0,0 +1,395 @@ +; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s +; +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0 + +; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: Could not fuse entry with for.end: Loops are not adjacent +define void @non_adjacent(i32* noalias %A) !dbg !67 { +entry: + br label %for.cond, !dbg !77 + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ], !dbg !76 + %exitcond1 = icmp ne i64 %i.0, 100, !dbg !78 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !dbg !80 + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %sub = add nsw i64 %i.0, -3, !dbg !81 + %add = add nuw nsw i64 %i.0, 3, !dbg !81 + %mul = mul nsw i64 %sub, %add, !dbg !81 + %rem = srem i64 %mul, %i.0, !dbg !81 + %conv = trunc i64 %rem to i32, !dbg !81 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0, !dbg !83 + store i32 %conv, i32* %arrayidx, align 4, !dbg !84, !tbaa !39 + br label %for.inc, !dbg !85 + +for.inc: ; preds = %for.body + %inc = add nuw nsw i64 %i.0, 1, !dbg !86 + br label %for.cond, !dbg !87, !llvm.loop !88 + +for.end: ; preds = %for.cond.cleanup + br label %for.cond2, !dbg !91 + +for.cond2: ; preds = %for.inc13, %for.end + %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ], !dbg !90 + %exitcond = icmp ne i64 %i1.0, 100, !dbg !92 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !dbg !94 + +for.cond.cleanup5: ; preds = %for.cond2 + br label %for.end15 + +for.body6: ; preds = %for.cond2 + %sub7 = add nsw i64 %i1.0, -3, !dbg !95 + %add8 = add nuw nsw i64 %i1.0, 3, !dbg !95 + %mul9 = mul nsw i64 %sub7, %add8, !dbg !95 + %rem10 = srem i64 %mul9, %i1.0, !dbg !95 + %conv11 = trunc i64 %rem10 to i32, !dbg !95 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0, !dbg !97 + store i32 %conv11, i32* %arrayidx12, align 4, !dbg !98, !tbaa !39 + br label %for.inc13, !dbg !99 + +for.inc13: ; preds = %for.body6 + %inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100 + br label %for.cond2, !dbg !101, !llvm.loop !102 + +for.end15: ; preds = %for.cond.cleanup5 + ret void, !dbg !104 +} + + +; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: Could not fuse entry with for.end: Loop trip counts are not the same +define void @different_bounds(i32* noalias %A) !dbg !105 { +entry: + br label %for.cond, !dbg !114 + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ], !dbg !113 + %exitcond1 = icmp ne i64 %i.0, 100, !dbg !115 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !dbg !117 + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %sub = add nsw i64 %i.0, -3, !dbg !118 + %add = add nuw nsw i64 %i.0, 3, !dbg !118 + %mul = mul nsw i64 %sub, %add, !dbg !118 + %rem = srem i64 %mul, %i.0, !dbg !118 + %conv = trunc i64 %rem to i32, !dbg !118 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0, !dbg !120 + store i32 %conv, i32* %arrayidx, align 4, !dbg !121, !tbaa !39 + br label %for.inc, !dbg !122 + +for.inc: ; preds = %for.body + %inc = add nuw nsw i64 %i.0, 1, !dbg !123 + br label %for.cond, !dbg !124, !llvm.loop !125 + +for.end: ; preds = %for.cond.cleanup + br label %for.cond2, !dbg !128 + +for.cond2: ; preds = %for.inc13, %for.end + %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ], !dbg !127 + %exitcond = icmp ne i64 %i1.0, 200, !dbg !129 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !dbg !131 + +for.cond.cleanup5: ; preds = %for.cond2 + br label %for.end15 + +for.body6: ; preds = %for.cond2 + %sub7 = add nsw i64 %i1.0, -3, !dbg !132 + %add8 = add nuw nsw i64 %i1.0, 3, !dbg !132 + %mul9 = mul nsw i64 %sub7, %add8, !dbg !132 + %rem10 = srem i64 %mul9, %i1.0, !dbg !132 + %conv11 = trunc i64 %rem10 to i32, !dbg !132 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0, !dbg !134 + store i32 %conv11, i32* %arrayidx12, align 4, !dbg !135, !tbaa !39 + br label %for.inc13, !dbg !136 + +for.inc13: ; preds = %for.body6 + %inc14 = add nuw nsw i64 %i1.0, 1, !dbg !137 + br label %for.cond2, !dbg !138, !llvm.loop !139 + +for.end15: ; preds = %for.cond.cleanup5 + ret void, !dbg !141 +} + +; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: Could not fuse entry with for.end: Loop has a non-empty preheader +define void @negative_dependence(i32* noalias %A) !dbg !142 { +entry: + br label %for.cond, !dbg !151 + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ], !dbg !150 + %exitcond3 = icmp ne i64 %indvars.iv1, 100, !dbg !152 + br i1 %exitcond3, label %for.body, label %for.end, !dbg !154 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1, !dbg !155 + %tmp = trunc i64 %indvars.iv1 to i32, !dbg !157 + store i32 %tmp, i32* %arrayidx, align 4, !dbg !157, !tbaa !39 + br label %for.inc, !dbg !158 + +for.inc: ; preds = %for.body + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1, !dbg !159 + br label %for.cond, !dbg !160, !llvm.loop !161 + +for.end: ; preds = %for.cond + call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163 + br label %for.cond2, !dbg !164 + +for.cond2: ; preds = %for.inc10, %for.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ], !dbg !163 + %exitcond = icmp ne i64 %indvars.iv, 100, !dbg !165 + br i1 %exitcond, label %for.body5, label %for.end12, !dbg !167 + +for.body5: ; preds = %for.cond2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !168 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next, !dbg !169 + %tmp4 = load i32, i32* %arrayidx7, align 4, !dbg !169, !tbaa !39 + %mul = shl nsw i32 %tmp4, 1, !dbg !171 + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !172 + store i32 %mul, i32* %arrayidx9, align 4, !dbg !173, !tbaa !39 + br label %for.inc10, !dbg !174 + +for.inc10: ; preds = %for.body5 + br label %for.cond2, !dbg !175, !llvm.loop !176 + +for.end12: ; preds = %for.cond. + ret void, !dbg !178 +} + +; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: Could not fuse entry with for.cond2.preheader: Dependencies prevent fusion +define i32 @sumTest(i32* noalias %A) !dbg !179 { +entry: + br label %for.cond, !dbg !191 + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ], !dbg !190 + %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ], !dbg !189 + %exitcond3 = icmp ne i64 %indvars.iv1, 100, !dbg !192 + br i1 %exitcond3, label %for.body, label %for.cond2, !dbg !194 + +for.body: ; preds = %for.cond + br label %for.inc, !dbg !195 + +for.inc: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1, !dbg !196 + %tmp = load i32, i32* %arrayidx, align 4, !dbg !196, !tbaa !39 + %add = add nsw i32 %sum.0, %tmp, !dbg !197 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1, !dbg !198 + br label %for.cond, !dbg !199, !llvm.loop !200 + +for.cond2: ; preds = %for.inc10, %for.cond + %sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ], !dbg !189 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ], !dbg !202 + %exitcond = icmp ne i64 %indvars.iv, 100, !dbg !204 + br i1 %exitcond, label %for.body5, label %for.end12, !dbg !206 + +for.body5: ; preds = %for.cond2 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !207 + %tmp4 = load i32, i32* %arrayidx7, align 4, !dbg !207, !tbaa !39 + %div = sdiv i32 %tmp4, %sum.0.lcssa, !dbg !208 + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !209 + store i32 %div, i32* %arrayidx9, align 4, !dbg !210, !tbaa !39 + br label %for.inc10, !dbg !209 + +for.inc10: ; preds = %for.body5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !211 + br label %for.cond2, !dbg !212, !llvm.loop !213 + +for.end12: ; preds = %for.cond2 + ret i32 %sum.0.lcssa, !dbg !215 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13, !14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU) +!3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") +!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !DISubrange(count: 1024) +!11 = !{i32 2, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{i32 7, !"PIC Level", i32 2} +!16 = distinct !DISubprogram(name: "non_cfe", scope: !6, file: !6, line: 5, type: !17, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!17 = !DISubroutineType(types: !18) +!18 = !{null, !19} +!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20) +!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) +!21 = !{!22, !23, !25} +!22 = !DILocalVariable(name: "A", arg: 1, scope: !16, file: !6, line: 5, type: !19) +!23 = !DILocalVariable(name: "i", scope: !24, file: !6, line: 6, type: !8) +!24 = distinct !DILexicalBlock(scope: !16, file: !6, line: 6, column: 3) +!25 = !DILocalVariable(name: "i", scope: !26, file: !6, line: 11, type: !8) +!26 = distinct !DILexicalBlock(scope: !27, file: !6, line: 11, column: 5) +!27 = distinct !DILexicalBlock(scope: !28, file: !6, line: 10, column: 17) +!28 = distinct !DILexicalBlock(scope: !16, file: !6, line: 10, column: 7) +!39 = !{!40, !40, i64 0} +!40 = !{!"int", !41, i64 0} +!41 = !{!"omnipotent char", !42, i64 0} +!42 = !{!"Simple C/C++ TBAA"} + +!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68) +!68 = !{!69, !70, !73} +!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19) +!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72) +!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3) +!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) +!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72) +!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3) +!75 = !DILocation(line: 0, scope: !67) +!76 = !DILocation(line: 0, scope: !71) +!77 = !DILocation(line: 18, column: 8, scope: !71) +!78 = !DILocation(line: 18, column: 22, scope: !79) +!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3) +!80 = !DILocation(line: 18, column: 3, scope: !71) +!81 = !DILocation(line: 19, column: 12, scope: !82) +!82 = distinct !DILexicalBlock(scope: !79, file: !6, line: 18, column: 34) +!83 = !DILocation(line: 19, column: 5, scope: !82) +!84 = !DILocation(line: 19, column: 10, scope: !82) +!85 = !DILocation(line: 20, column: 3, scope: !82) +!86 = !DILocation(line: 18, column: 30, scope: !79) +!87 = !DILocation(line: 18, column: 3, scope: !79) +!88 = distinct !{!88, !80, !89} +!89 = !DILocation(line: 20, column: 3, scope: !71) +!90 = !DILocation(line: 0, scope: !74) +!91 = !DILocation(line: 22, column: 8, scope: !74) +!92 = !DILocation(line: 22, column: 22, scope: !93) +!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3) +!94 = !DILocation(line: 22, column: 3, scope: !74) +!95 = !DILocation(line: 23, column: 12, scope: !96) +!96 = distinct !DILexicalBlock(scope: !93, file: !6, line: 22, column: 34) +!97 = !DILocation(line: 23, column: 5, scope: !96) +!98 = !DILocation(line: 23, column: 10, scope: !96) +!99 = !DILocation(line: 24, column: 3, scope: !96) +!100 = !DILocation(line: 22, column: 30, scope: !93) +!101 = !DILocation(line: 22, column: 3, scope: !93) +!102 = distinct !{!102, !94, !103} +!103 = !DILocation(line: 24, column: 3, scope: !74) +!104 = !DILocation(line: 25, column: 1, scope: !67) +!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106) +!106 = !{!107, !108, !110} +!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19) +!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72) +!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3) +!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72) +!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3) +!112 = !DILocation(line: 0, scope: !105) +!113 = !DILocation(line: 0, scope: !109) +!114 = !DILocation(line: 28, column: 8, scope: !109) +!115 = !DILocation(line: 28, column: 22, scope: !116) +!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3) +!117 = !DILocation(line: 28, column: 3, scope: !109) +!118 = !DILocation(line: 29, column: 12, scope: !119) +!119 = distinct !DILexicalBlock(scope: !116, file: !6, line: 28, column: 34) +!120 = !DILocation(line: 29, column: 5, scope: !119) +!121 = !DILocation(line: 29, column: 10, scope: !119) +!122 = !DILocation(line: 30, column: 3, scope: !119) +!123 = !DILocation(line: 28, column: 30, scope: !116) +!124 = !DILocation(line: 28, column: 3, scope: !116) +!125 = distinct !{!125, !117, !126} +!126 = !DILocation(line: 30, column: 3, scope: !109) +!127 = !DILocation(line: 0, scope: !111) +!128 = !DILocation(line: 32, column: 8, scope: !111) +!129 = !DILocation(line: 32, column: 22, scope: !130) +!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3) +!131 = !DILocation(line: 32, column: 3, scope: !111) +!132 = !DILocation(line: 33, column: 12, scope: !133) +!133 = distinct !DILexicalBlock(scope: !130, file: !6, line: 32, column: 34) +!134 = !DILocation(line: 33, column: 5, scope: !133) +!135 = !DILocation(line: 33, column: 10, scope: !133) +!136 = !DILocation(line: 34, column: 3, scope: !133) +!137 = !DILocation(line: 32, column: 30, scope: !130) +!138 = !DILocation(line: 32, column: 3, scope: !130) +!139 = distinct !{!139, !131, !140} +!140 = !DILocation(line: 34, column: 3, scope: !111) +!141 = !DILocation(line: 35, column: 1, scope: !105) +!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143) +!143 = !{!144, !145, !147} +!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19) +!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8) +!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3) +!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8) +!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3) +!149 = !DILocation(line: 0, scope: !142) +!150 = !DILocation(line: 0, scope: !146) +!151 = !DILocation(line: 38, column: 8, scope: !146) +!152 = !DILocation(line: 38, column: 21, scope: !153) +!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3) +!154 = !DILocation(line: 38, column: 3, scope: !146) +!155 = !DILocation(line: 39, column: 5, scope: !156) +!156 = distinct !DILexicalBlock(scope: !153, file: !6, line: 38, column: 33) +!157 = !DILocation(line: 39, column: 10, scope: !156) +!158 = !DILocation(line: 40, column: 3, scope: !156) +!159 = !DILocation(line: 38, column: 29, scope: !153) +!160 = !DILocation(line: 38, column: 3, scope: !153) +!161 = distinct !{!161, !154, !162} +!162 = !DILocation(line: 40, column: 3, scope: !146) +!163 = !DILocation(line: 0, scope: !148) +!164 = !DILocation(line: 42, column: 8, scope: !148) +!165 = !DILocation(line: 42, column: 21, scope: !166) +!166 = distinct !DILexicalBlock(scope: !148, file: !6, line: 42, column: 3) +!167 = !DILocation(line: 42, column: 3, scope: !148) +!168 = !DILocation(line: 42, column: 29, scope: !166) +!169 = !DILocation(line: 43, column: 16, scope: !170) +!170 = distinct !DILexicalBlock(scope: !166, file: !6, line: 42, column: 33) +!171 = !DILocation(line: 43, column: 14, scope: !170) +!172 = !DILocation(line: 43, column: 5, scope: !170) +!173 = !DILocation(line: 43, column: 10, scope: !170) +!174 = !DILocation(line: 44, column: 3, scope: !170) +!175 = !DILocation(line: 42, column: 3, scope: !166) +!176 = distinct !{!176, !167, !177} +!177 = !DILocation(line: 44, column: 3, scope: !148) +!178 = !DILocation(line: 45, column: 1, scope: !142) +!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182) +!180 = !DISubroutineType(types: !181) +!181 = !{!8, !19} +!182 = !{!183, !184, !185, !187} +!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19) +!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8) +!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8) +!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3) +!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8) +!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3) +!189 = !DILocation(line: 0, scope: !179) +!190 = !DILocation(line: 0, scope: !186) +!191 = !DILocation(line: 51, column: 7, scope: !186) +!192 = !DILocation(line: 51, column: 20, scope: !193) +!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3) +!194 = !DILocation(line: 51, column: 3, scope: !186) +!195 = !DILocation(line: 52, column: 5, scope: !193) +!196 = !DILocation(line: 52, column: 12, scope: !193) +!197 = !DILocation(line: 52, column: 9, scope: !193) +!198 = !DILocation(line: 51, column: 28, scope: !193) +!199 = !DILocation(line: 51, column: 3, scope: !193) +!200 = distinct !{!200, !194, !201} +!201 = !DILocation(line: 52, column: 15, scope: !186) +!202 = !DILocation(line: 0, scope: !188) +!203 = !DILocation(line: 54, column: 7, scope: !188) +!204 = !DILocation(line: 54, column: 20, scope: !205) +!205 = distinct !DILexicalBlock(scope: !188, file: !6, line: 54, column: 3) +!206 = !DILocation(line: 54, column: 3, scope: !188) +!207 = !DILocation(line: 55, column: 12, scope: !205) +!208 = !DILocation(line: 55, column: 16, scope: !205) +!209 = !DILocation(line: 55, column: 5, scope: !205) +!210 = !DILocation(line: 55, column: 10, scope: !205) +!211 = !DILocation(line: 54, column: 28, scope: !205) +!212 = !DILocation(line: 54, column: 3, scope: !205) +!213 = distinct !{!213, !206, !214} +!214 = !DILocation(line: 55, column: 17, scope: !188) +!215 = !DILocation(line: 57, column: 3, scope: !179)