Index: lib/Transforms/Scalar/LoopSimplifyCFG.cpp =================================================================== --- lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -87,6 +87,8 @@ DominatorTree &DT; MemorySSAUpdater *MSSAU; + // Whether or not the current loop has irreducible CFG. + bool HasIrreducibleCFG = false; // Whether or not the current loop will still exist after terminator constant // folding will be done. In theory, there are two ways how it can happen: // 1. Loop's latch(es) become unreachable from loop header; @@ -145,6 +147,26 @@ BlocksInLoopAfterFolding); } + /// Whether or not the current loop has irreducible CFG. + bool hasIrreducibleCFG(LoopBlocksDFS &DFS) { + // Index of a basic block in RPO traversal. + DenseMap RPO; + unsigned Current = 0; + for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) + RPO[*I] = Current++; + + for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { + BasicBlock *BB = *I; + for (auto *Succ : successors(BB)) + if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ]) + // If an edge goes from a block with greater order number into a block + // with lesses number, and it is not a loop backedge, then it can only + // be a part of irreducible non-loop cycle. + return true; + } + return false; + } + /// Fill all information about status of blocks and exits of the current loop /// if constant folding of all branches will be done. void analyze() { @@ -152,6 +174,18 @@ DFS.perform(&LI); assert(DFS.isComplete() && "DFS is expected to be finished"); + // TODO: The algorithm below relies on both RPO and Postorder traversals. + // When the loop has only reducible CFG inside, then the invariant "all + // predecessors of X are processed before X in RPO" is preserved. However + // an irreducible loop can break this invariant (e.g. latch does not have to + // be the last block in the traversal in this case, and the algorithm relies + // on this). We can later decide to support such cases by altering the + // algorithms, but so far we just give up analyzing them. + if (hasIrreducibleCFG(DFS)) { + HasIrreducibleCFG = true; + return; + } + // Collect live and dead loop blocks and exits. LiveLoopBlocks.insert(L.getHeader()); for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { @@ -323,6 +357,11 @@ LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName() << ": "); + if (HasIrreducibleCFG) { + LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n"); + return false; + } + // Nothing to constant-fold. if (FoldCandidates.empty()) { LLVM_DEBUG( Index: test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll =================================================================== --- /dev/null +++ test/Transforms/LoopSimplifyCFG/irreducible_cfg.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; REQUIRES: asserts +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -passes='require,loop(simplify-cfg)' -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s +; RUN: opt -S -enable-loop-simplifycfg-term-folding=true -loop-simplifycfg -enable-mssa-loop-dependency=true -verify-memoryssa -debug-only=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 2>&1 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +; This test has irreducible CFG, and RPO may be the following: +; Header, Dead, Irreducible2, Latch, Irreducible3, Irreducible1. +; As result, we will process Irreducible2 before its predecessor Irreducible1. +; The current algorithm gets confused in this case. We may support irreducible +; CFG in the future. +define void @irreducible_cfg(i1 %cond) { +; CHECK-LABEL: @irreducible_cfg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: br i1 false, label [[DEAD:%.*]], label [[IRREDUCIBLE1:%.*]] +; CHECK: dead: +; CHECK-NEXT: br label [[IRREDUCIBLE2:%.*]] +; CHECK: irreducible2: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[LATCH:%.*]], label [[IRREDUCIBLE3:%.*]] +; CHECK: latch: +; CHECK-NEXT: br label [[HEADER]] +; CHECK: irreducible3: +; CHECK-NEXT: br label [[IRREDUCIBLE1]] +; CHECK: irreducible1: +; CHECK-NEXT: br label [[IRREDUCIBLE2]] +; +entry: + br label %header + +header: ; preds = %latch, %entry + br i1 false, label %dead, label %irreducible1 + +dead: ; preds = %header + br label %irreducible2 + +irreducible2: ; preds = %irreducible1, %dead + br i1 %cond, label %latch, label %irreducible3 + +latch: ; preds = %irreducible2 + br label %header + +irreducible3: ; preds = %irreducible2 + br label %irreducible1 + +irreducible1: ; preds = %irreducible3, %header + br label %irreducible2 +}