Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -2729,15 +2729,15 @@ // For the dead blocks' live successors, update their phi nodes by replacing // the operands corresponding to dead blocks with UndefVal. - for(SmallSetVector::iterator I = DF.begin(), E = DF.end(); - I != E; I++) { - BasicBlock *B = *I; + while (!DF.empty()) { + BasicBlock *B = DF.pop_back_val(); if (DeadBlocks.count(B)) continue; // First, split the critical edges. This might also create additional blocks // to preserve LoopSimplify form and adjust edges accordingly. SmallVector Preds(pred_begin(B), pred_end(B)); + SmallVector Candidates; for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; @@ -2745,11 +2745,67 @@ if (llvm::any_of(successors(P), [B](BasicBlock *Succ) { return Succ == B; }) && isCriticalEdge(P->getTerminator(), B)) { - if (BasicBlock *S = splitCriticalEdges(P, B)) - DeadBlocks.insert(P = S); + Candidates.push_back(P); } } + // - - - - - - - - + // | ... ... | + // |[D1] | => A simplified loop + // | | \ | + // | | [D2] [B1] | + // -|- - |- - |- - + // | | | + // \ | / + // \ | / + // [ DF1 ] => exit block for loop + // + // Consider the above scenario, there're two dead blocks [D1] + [D2], and + // a live block[B1]([D1][D2][B1] in one loop). [DF1] will a common dominance + // frontier block for [D1] and [D2], so Candidates will contain two elements + // [D1, D2]. [D1] has two succs: [D2] and [DF1], for [D1],first llvm::any_of + // will return true, so splitCriticalEdges will be called. + // + // - - - - - - - - + // | ... ... | + // |[D1] | => A simplified loop + // | | \ | + // | | [D2] [B1] | + // -|- - |- - |- - + // | | | + // | \ / + // [D3] [DF2] => New Exit block[DF2] for loop + // \ | + // [ DF1 ] + // + // After splitCriticalEdges, a new crit_edge block[D3] will be created, and + // it is added into dead blocks. Also, a new loop exit block[DF2] is created + // to preserve LoopSimplify. After that, llvm::any_of will return false for + // [D2], and we need to add [DF2] as a new dominance frontier block for [D2] + // since it has a pred edge to non-dead block[B1], and then, we need to + // update the undef incoming value for phi in [DF2]. + for (BasicBlock *C : Candidates) { + if (!llvm::any_of(successors(C), + [B](BasicBlock *Succ) { return Succ == B; })) { + std::set Succs; + std::copy_if(succ_begin(C), succ_end(C), + std::inserter(Succs, Succs.begin()), + [&](BasicBlock *Succ) { return !DeadBlocks.count(Succ); }); + std::set Preds; + std::copy_if(pred_begin(B), pred_end(B), + std::inserter(Preds, Preds.begin()), + [&](BasicBlock *Pred) { return !DeadBlocks.count(Pred); }); + std::vector NewBlocks; + std::set_intersection(Succs.begin(), Succs.end(), Preds.begin(), + Preds.end(), + std::inserter(NewBlocks, NewBlocks.begin())); + assert(NewBlocks.size() == 1 && + "Invalid number of blocks between candidate and successor."); + DF.insert(NewBlocks.back()); + } else if (BasicBlock *S = splitCriticalEdges(C, B)) + DeadBlocks.insert(C = S); + } + // Now undef the incoming values from the dead predecessors. for (BasicBlock *P : predecessors(B)) { if (!DeadBlocks.count(P)) Index: llvm/test/Transforms/GVN/pr47462.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/pr47462.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@global_a = common dso_local local_unnamed_addr global i32 0, align 4 +@global_b = common dso_local local_unnamed_addr global i32 0, align 4 +@global_c = common dso_local local_unnamed_addr global i32* null, align 8 + +; Should have no load in for.end. +; CHECK-LABEL: @test +; CHECK: for.end.loopexitsplitsplit: +; CHECK: for.end: +; CHECK-NOT: load + +define dso_local i32 @test() { +entry: + %local_array = alloca [3 x i32], align 4 + store i32 0, i32* @global_a, align 4 + %i = load i32, i32* @global_b, align 4 + %toboo1 = icmp eq i32 %i, 0 + br i1 %toboo1, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %i1 = load i32*, i32** @global_c, align 8 + br label %for.body + +for.body: ; preds = %for.inc, %for.body.lr.ph + %g.0 = phi i32* [ @global_a, %for.body.lr.ph ], [ %arrayidx.le, %for.inc ] + %i2 = load i32, i32* %i1, align 4 + %toboo2 = icmp eq i32 %i2, 0 + br i1 %toboo2, label %if.end, label %for.end.loopexit + +if.end: ; preds = %for.body + %conv = trunc i64 0 to i32 + store i32 %conv, i32* %g.0, align 4 + br i1 true, label %cleanup, label %for.cond.preheader + +for.cond.preheader: ; preds = %if.end + %idxprom.le = zext i32 2 to i64 + %arrayidx.le = getelementptr inbounds [3 x i32], [3 x i32]* %local_array, i64 0, i64 %idxprom.le + %toboo3 = icmp eq i64 0, 0 + br i1 %toboo3, label %for.inc, label %for.end.loopexit + +for.inc: ; preds = %for.cond.preheader + %i3 = load i32, i32* @global_b, align 4 + %dec = add nsw i32 %i3, -1 + store i32 %dec, i32* @global_b, align 4 + %toboo4 = icmp eq i32 %dec, 0 + br i1 %toboo4, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.inc, %for.cond.preheader, %for.body + %g.1 = phi i32* [ %arrayidx.le, %for.inc ], [ %g.0, %for.body ], [ %arrayidx.le, %for.cond.preheader ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %g.2 = phi i32* [ @global_a, %entry ], [ %g.1, %for.end.loopexit ] + %i4 = load i32, i32* %g.2, align 4 + store i32 %i4, i32* @global_a, align 4 + br label %cleanup + +cleanup: ; preds = %for.end, %if.end + ret i32 0 +}