Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -2746,25 +2746,78 @@ // For the dead blocks' live successors, update their phi nodes by replacing // the operands corresponding to dead blocks with UndefVal. - for(SmallSetVector::iterator I = DF.begin(), E = DF.end(); - I != E; I++) { - BasicBlock *B = *I; + while (!DF.empty()) { + BasicBlock *B = DF.pop_back_val(); if (DeadBlocks.count(B)) continue; // First, split the critical edges. This might also create additional blocks // to preserve LoopSimplify form and adjust edges accordingly. SmallVector Preds(pred_begin(B), pred_end(B)); + SmallVector Candidates; for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; - if (llvm::any_of(successors(P), - [B](BasicBlock *Succ) { return Succ == B; }) && - isCriticalEdge(P->getTerminator(), B)) { - if (BasicBlock *S = splitCriticalEdges(P, B)) - DeadBlocks.insert(P = S); - } + if (isCriticalEdge(P->getTerminator(), B)) + Candidates.push_back(P); + } + + // - - - - - - - - + // | ... ... | + // |[D1] | => A simplified loop + // | | \ | + // | | [D2] [B1] | + // -|- - |- - |- - + // | | | + // \ | / + // \ | / + // [ DF1 ] => exit block for loop + // + // Consider the above scenario, there're two dead blocks [D1] + [D2], and + // a live block[B1]([D1][D2][B1] in one loop). [DF1] is a common dominance + // frontier block for [D1] and [D2], so Candidates will contain two elements + // [D1, D2]. [D1] has two succs: [D2] and [DF1], for [D1], llvm::any_of + // condition is true, so splitCriticalEdges will be called. + // + // - - - - - - - - + // | ... ... | + // |[D1] | => A simplified loop + // | | \ | + // | | [D2] [B1] | + // -|- - |- - |- - + // | | | + // | \ / + // [D3] [DF2] => New Exit block[DF2] for loop + // \ | + // [ DF1 ] + // + // SplitCriticalEdges should return true, so a new crit_edge block[D3] will + // be created, and added into dead blocks. Also, a new loop exit block[DF2] + // is created in order to preserve LoopSimplify. After that, llvm::any_of + // condition is false for [D2], and we need to add [DF2] as a new dominance + // frontier block for [D2] since it has a pred edge to non-dead block[B1], + // and then, we need to update the undef incoming value for phi in [DF2]. + for (BasicBlock *C : Candidates) { + if (!llvm::any_of(successors(C), + [B](BasicBlock *Succ) { return Succ == B; })) { + std::set Succs; + std::copy_if(succ_begin(C), succ_end(C), + std::inserter(Succs, Succs.begin()), + [&](BasicBlock *Succ) { return !DeadBlocks.count(Succ); }); + std::set Preds; + std::copy_if(pred_begin(B), pred_end(B), + std::inserter(Preds, Preds.begin()), + [&](BasicBlock *Pred) { return !DeadBlocks.count(Pred); }); + std::vector NewBlocks; + std::set_intersection(Succs.begin(), Succs.end(), Preds.begin(), + Preds.end(), + std::inserter(NewBlocks, NewBlocks.begin())); + assert(NewBlocks.size() == 1 && + "Invalid number of blocks between candidate and successor."); + DF.insert(NewBlocks.back()); + } else if (BasicBlock *S = splitCriticalEdges(C, B)) + DeadBlocks.insert(C = S); } // Now undef the incoming values from the dead predecessors. Index: llvm/test/Transforms/GVN/pr47462.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/pr47462.ll @@ -0,0 +1,202 @@ +; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop(loop-simplifycfg),gvn' -enable-load-pre -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.b = type { i32 } + +@a = common local_unnamed_addr global i32 0, align 4 +@b = common local_unnamed_addr global i32 0, align 4 +@c = common local_unnamed_addr global i32* null, align 8 +@f = common local_unnamed_addr global i32 0, align 4 +@e = common local_unnamed_addr global i32 0, align 4 +@g = common local_unnamed_addr global i32 0, align 4 +@d = common local_unnamed_addr global i32 0, align 4 + +; Should have no load in for.end. +; CHECK-LABEL: @test +; CHECK: for.end.loopexitsplitsplit: +; CHECK: for.end: +; CHECK-NOT: load + +define i32 @test() { +entry: + %local_array = alloca [3 x i32], align 4 + store i32 0, i32* @a, align 4 + %i = load i32, i32* @b, align 4 + %toboo1 = icmp eq i32 %i, 0 + br i1 %toboo1, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %i1 = load i32*, i32** @c, align 8 + br label %for.body + +for.body: ; preds = %for.inc, %for.body.lr.ph + %g.0 = phi i32* [ @a, %for.body.lr.ph ], [ %arrayidx.le, %for.inc ] + %i2 = load i32, i32* %i1, align 4 + %toboo2 = icmp eq i32 %i2, 0 + br i1 %toboo2, label %if.end, label %for.end.loopexit + +if.end: ; preds = %for.body + %conv = trunc i64 0 to i32 + store i32 %conv, i32* %g.0, align 4 + br i1 true, label %cleanup, label %for.cond.preheader + +for.cond.preheader: ; preds = %if.end + %idxprom.le = zext i32 2 to i64 + %arrayidx.le = getelementptr inbounds [3 x i32], [3 x i32]* %local_array, i64 0, i64 %idxprom.le + %toboo3 = icmp eq i64 0, 0 + br i1 %toboo3, label %for.inc, label %for.end.loopexit + +for.inc: ; preds = %for.cond.preheader + %i3 = load i32, i32* @b, align 4 + %dec = add nsw i32 %i3, -1 + store i32 %dec, i32* @b, align 4 + %toboo4 = icmp eq i32 %dec, 0 + br i1 %toboo4, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.inc, %for.cond.preheader, %for.body + %g.1 = phi i32* [ %arrayidx.le, %for.inc ], [ %g.0, %for.body ], [ %arrayidx.le, %for.cond.preheader ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %g.2 = phi i32* [ @a, %entry ], [ %g.1, %for.end.loopexit ] + %i4 = load i32, i32* %g.2, align 4 + store i32 %i4, i32* @a, align 4 + br label %cleanup + +cleanup: ; preds = %for.end, %if.end + ret i32 0 +} + +; CHECK-LABEL: @test1 +; CHECK: entry: +; CHECK: load +; CHECK-NEXT: load +; CHECK-NEXT: load +; CHECK: for.cond: +; CHECK-NOT: load +; CHECK: if.end10: +; CHECK: load + +define i32 @test1() local_unnamed_addr { +entry: + %i = alloca [60 x %struct.b], align 4 + %i1 = bitcast [60 x %struct.b]* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 240, i8* nonnull %i1) + %i2 = load i32, i32* @a, align 4 + %i3 = load i32, i32* @d, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc17.1, %entry + br label %for.body + +for.body: ; preds = %for.cond + %i4 = load i32, i32* @e, align 4 + %tobool = icmp eq i32 %i4, 0 + br i1 %tobool, label %for.cond2.preheader, label %for.inc17 + +for.cond2.preheader: ; preds = %for.body + br i1 true, label %for.cond2.preheader.if.end16.loopexit.split_crit_edge, label %for.body4.preheader + +for.body4.preheader: ; preds = %for.cond2.preheader + br label %for.body4 + +for.cond2.preheader.if.end16.loopexit.split_crit_edge: ; preds = %for.cond2.preheader.1, %for.cond2.preheader + %storemerge20.lcssa = phi i32 [ 0, %for.cond2.preheader ], [ 1, %for.cond2.preheader.1 ] + %div.lcssa = phi i32 [ 0, %for.cond2.preheader ], [ %div.1, %for.cond2.preheader.1 ] + store i32 %storemerge20.lcssa, i32* @f, align 4 + store i32 %div.lcssa, i32* @g, align 4 + br label %if.end16 + +for.body4: ; preds = %for.body4.preheader + %add = add nsw i32 %i3, 6 + %mul = shl nsw i32 %i3, 1 + %add7 = add nsw i32 %add, %mul + %idxprom = sext i32 %add7 to i64 + %c = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom, i32 0 + store i32 0, i32* %c, align 4 + br i1 true, label %if.end10, label %for.inc + +if.end10: ; preds = %for.inc.134, %for.body4.1, %for.inc, %for.body4 + %storemerge20.lcssa27 = phi i32 [ 0, %for.body4 ], [ 0, %for.inc ], [ 1, %for.body4.1 ], [ 1, %for.inc.134 ] + %div.lcssa26 = phi i32 [ 0, %for.body4 ], [ 0, %for.inc ], [ %div.1, %for.body4.1 ], [ %div.1, %for.inc.134 ] + %mul.lcssa = phi i32 [ %mul, %for.body4 ], [ %mul.1, %for.inc ], [ %mul.130, %for.body4.1 ], [ %mul.1.1, %for.inc.134 ] + store i32 %storemerge20.lcssa27, i32* @f, align 4 + store i32 %div.lcssa26, i32* @g, align 4 + %idxprom11 = sext i32 %i3 to i64 + %idxprom14 = sext i32 %mul.lcssa to i64 + %i5 = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom14, i32 0 + %i6 = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom11, i32 0 + %i7 = load i32, i32* %i5, align 4 + store i32 %i7, i32* %i6, align 4 + br label %if.end16 + +if.end16: ; preds = %if.end10, %for.cond2.preheader.if.end16.loopexit.split_crit_edge + %i8 = load i32, i32* @d, align 4 + call void @llvm.lifetime.end.p0i8(i64 240, i8* nonnull %i1) + ret i32 %i8 + +for.inc: ; preds = %for.body4 + store i32 1, i32* @e, align 4 + %add.1 = add nsw i32 %i3, 6 + %mul.1 = shl nsw i32 %i3, 1 + %add7.1 = add nsw i32 %add.1, %mul.1 + %idxprom.1 = sext i32 %add7.1 to i64 + %c.1 = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom.1, i32 0 + store i32 0, i32* %c.1, align 4 + br i1 false, label %if.end10, label %for.inc.1 + +for.inc17: ; preds = %for.inc.1, %for.body + %i9 = load i32, i32* @e, align 4 + %tobool.1 = icmp eq i32 %i9, 0 + br i1 %tobool.1, label %for.cond2.preheader.1, label %for.inc17.1 + +for.inc.1: ; preds = %for.inc + store i32 2, i32* @e, align 4 + br label %for.inc17 + +for.cond2.preheader.1: ; preds = %for.inc17 + %div.1 = sdiv i32 1, %i2 + %tobool5.1 = icmp eq i32 %div.1, 0 + br i1 %tobool5.1, label %for.cond2.preheader.if.end16.loopexit.split_crit_edge, label %for.body4.preheader.1 + +for.body4.preheader.1: ; preds = %for.cond2.preheader.1 + br label %for.body4.1 + +for.body4.1: ; preds = %for.body4.preheader.1 + %add.129 = add nsw i32 %i3, 6 + %mul.130 = shl nsw i32 %i3, 1 + %add7.131 = add nsw i32 %add.129, %mul.130 + %idxprom.132 = sext i32 %add7.131 to i64 + %c.133 = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom.132, i32 0 + store i32 0, i32* %c.133, align 4 + br i1 false, label %if.end10, label %for.inc.134 + +for.inc.134: ; preds = %for.body4.1 + store i32 1, i32* @e, align 4 + %add.1.1 = add nsw i32 %i3, 6 + %mul.1.1 = shl nsw i32 %i3, 1 + %add7.1.1 = add nsw i32 %add.1.1, %mul.1.1 + %idxprom.1.1 = sext i32 %add7.1.1 to i64 + %c.1.1 = getelementptr inbounds [60 x %struct.b], [60 x %struct.b]* %i, i64 0, i64 %idxprom.1.1, i32 0 + store i32 0, i32* %c.1.1, align 4 + br i1 false, label %if.end10, label %for.inc.1.1 + +for.inc.1.1: ; preds = %for.inc.134 + store i32 2, i32* @e, align 4 + br label %for.inc17.1 + +for.inc17.1: ; preds = %for.inc.1.1, %for.inc17 + br label %for.cond +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)