Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -50,6 +50,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -220,6 +221,60 @@ class LoopVectorizationCostModel; class LoopVectorizationRequirements; +// A traits type that is intended to be used in graph algorithms. The graph it +// models starts at the loop header, and traverses the BasicBlocks that are in +// the loop body, but not the loop header. +struct LoopBodyTraits { + using NodeRef = std::pair; + + class WrappedSuccIterator + : public iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { + using BaseT = iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>; + + const Loop *L; + + public: + WrappedSuccIterator(succ_iterator Begin, const Loop *L) + : BaseT(Begin), L(L) {} + + NodeRef operator*() const { return {this->L, *this->I}; } + }; + + struct LoopBodyFilter { + bool operator()(NodeRef N) const { + const Loop *L = N.first; + return N.second != L->getHeader() && L->contains(N.second); + } + }; + + using ChildIteratorType = + filter_iterator; + + static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; } + + static ChildIteratorType child_begin(NodeRef Node) { + return make_filter_range(make_range( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .begin(); + } + + static ChildIteratorType child_end(NodeRef Node) { + return make_filter_range(make_range( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .end(); + } +}; + /// \brief This modifies LoopAccessReport to initialize message with /// loop-vectorizer-specific part. class VectorizationReport : public LoopAccessReport { @@ -1481,6 +1536,24 @@ return TTI->isLegalMaskedGather(DataType); } + /// Returns true if the given loop body has a cycle, excluding the loop + /// itself. + bool hasCyclesInLoopBody() const { + if (!TheLoop->empty()) + return true; + + for (const auto SCC : + make_range(scc_iterator::begin(*TheLoop), + scc_iterator::end(*TheLoop))) { + if (SCC.size() > 1) { + DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n"); + DEBUG(TheLoop->dump()); + return true; + } + } + return false; + } + /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } @@ -4479,8 +4552,8 @@ } // We can only vectorize innermost loops. - if (!TheLoop->empty()) { - emitAnalysis(VectorizationReport() << "loop is not the innermost loop"); + if (hasCyclesInLoopBody()) { + emitAnalysis(VectorizationReport() << "the loop body has a cycle"); return false; } Index: test/Transforms/LoopVectorize/pr28541.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/pr28541.ll @@ -0,0 +1,75 @@ +; RUN: opt -O2 -S < %s + +; Check that opt does not crash on such input: +; +; a, b, c; +; fn1() { +; while (b--) { +; c = a; +; switch (a & 3) +; case 0: +; do +; case 3: +; case 2: +; case 1: +; ; +; while (--c) +; ; +; } +; } + +@b = common global i32 0, align 4 +@a = common global i32 0, align 4 +@c = common global i32 0, align 4 + +; CHECK-LABEL: @fn1 +define i32 @fn1() { +entry: + %retval = alloca i32, align 4 + br label %while.cond + +while.cond: ; preds = %sw.epilog, %entry + %tmp = load i32, i32* @b, align 4 + %dec = add nsw i32 %tmp, -1 + store i32 %dec, i32* @b, align 4 + %tobool = icmp ne i32 %tmp, 0 + br i1 %tobool, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %tmp1 = load i32, i32* @a, align 4 + store i32 %tmp1, i32* @c, align 4 + %tmp2 = load i32, i32* @a, align 4 + %and = and i32 %tmp2, 3 + switch i32 %and, label %sw.epilog [ + i32 0, label %sw.bb + i32 3, label %sw.bb1 + i32 2, label %sw.bb1 + i32 1, label %sw.bb1 + ] + +sw.bb: ; preds = %while.body + br label %do.body + +do.body: ; preds = %do.cond, %sw.bb + br label %sw.bb1 + +sw.bb1: ; preds = %do.body, %while.body, %while.body, %while.body + br label %do.cond + +do.cond: ; preds = %sw.bb1 + %tmp3 = load i32, i32* @c, align 4 + %dec2 = add nsw i32 %tmp3, -1 + store i32 %dec2, i32* @c, align 4 + %tobool3 = icmp ne i32 %dec2, 0 + br i1 %tobool3, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %sw.epilog + +sw.epilog: ; preds = %do.end, %while.body + br label %while.cond + +while.end: ; preds = %while.cond + %tmp4 = load i32, i32* %retval, align 4 + ret i32 %tmp4 +}