diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -2004,7 +2005,8 @@ /// \endcode static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, - Value *&CurrX) { + Value *&CurrX, + size_t &CanonicalHeaderSize) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Performing shift-until-bittest idiom detection.\n"); @@ -2037,6 +2039,7 @@ // Step 2: Check if the backedge's condition is in desirable form. auto MatchVariableBitMask = [&]() { + CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_c_And(m_Value(Rec), @@ -2046,14 +2049,24 @@ CurLoop)))); }; auto MatchConstantBitMask = [&]() { + CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_c_And(m_Value(Rec), m_CombineAnd(m_Value(BitMask), m_Power2()))) && (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; + auto MatchDecomposableConstantBitMask = [&]() { + CanonicalHeaderSize = 4; - if (!MatchVariableBitMask() && !MatchConstantBitMask()) { - // FIXME: support sign bit test (use llvm::decomposeBitTestICmp()). + APInt Mask; + return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, Rec, Mask) && + ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && + (BitMask = ConstantInt::get(Rec->getType(), Mask)) && + (BitPos = ConstantInt::get(Rec->getType(), Mask.logBase2())); + }; + + if (!MatchVariableBitMask() && !MatchConstantBitMask() && + !MatchDecomposableConstantBitMask()) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n"); return false; } @@ -2134,7 +2147,9 @@ bool MadeChange = false; Value *X, *BitMask, *BitPos, *XCurr; - if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr)) { + size_t CanonicalHeaderSize; + if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, + CanonicalHeaderSize)) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detection failed.\n"); return MadeChange; @@ -2154,7 +2169,6 @@ // The loop must not have any other instructions other than the idiom itself. // FIXME: we could just rewrite the loop with countable trip count. size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug(); - constexpr size_t CanonicalHeaderSize = 5; assert(HeaderSize >= CanonicalHeaderSize); if (HeaderSize > CanonicalHeaderSize) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n"); diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -314,21 +314,42 @@ } define i32 @p5_constant_mask_signbit_canonical(i32 %x) { -; ALL-LABEL: @p5_constant_mask_signbit_canonical( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG84:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG85:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META81:metadata !.*]], metadata !DIExpression()), [[DBG85]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG86:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META82:metadata !.*]], metadata !DIExpression()), [[DBG86]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG87:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG87]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG88:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG85]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META81]], metadata !DIExpression()), [[DBG85]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG89:!dbg !.*]] +; LZCNT-LABEL: @p5_constant_mask_signbit_canonical( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG84:!dbg !.*]] +; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG84]] +; LZCNT-NEXT: [[X_NUMLEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG84]] +; LZCNT-NEXT: [[X_LOOPTRIPCOUNT:%.*]] = sub i32 31, [[X_NUMLEADINGONEPOS]], [[DBG84]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[X_LOOPTRIPCOUNT]], [[DBG84]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG85:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG84]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META81:metadata !.*]], metadata !DIExpression()), [[DBG84]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG86:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META82:metadata !.*]], metadata !DIExpression()), [[DBG86]] +; LZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG87:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG87]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG88:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG84]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META81]], metadata !DIExpression()), [[DBG84]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG89:!dbg !.*]] +; +; NOLZCNT-LABEL: @p5_constant_mask_signbit_canonical( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG84:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG85:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META81:metadata !.*]], metadata !DIExpression()), [[DBG85]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG86:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META82:metadata !.*]], metadata !DIExpression()), [[DBG86]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG87:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG87]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG88:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG85]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META81]], metadata !DIExpression()), [[DBG85]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG89:!dbg !.*]] ; entry: br label %loop @@ -541,21 +562,42 @@ ; ICmp-Br are commutative define i32 @p9(i32 %x) { -; ALL-LABEL: @p9( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG140:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG141:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META137:metadata !.*]], metadata !DIExpression()), [[DBG141]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG142:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META138:metadata !.*]], metadata !DIExpression()), [[DBG142]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG143:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG143]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG144:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG141]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META137]], metadata !DIExpression()), [[DBG141]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG145:!dbg !.*]] +; LZCNT-LABEL: @p9( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG140:!dbg !.*]] +; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG140]] +; LZCNT-NEXT: [[X_NUMLEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG140]] +; LZCNT-NEXT: [[X_LOOPTRIPCOUNT:%.*]] = sub i32 31, [[X_NUMLEADINGONEPOS]], [[DBG140]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[X_LOOPTRIPCOUNT]], [[DBG140]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG141:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG140]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META137:metadata !.*]], metadata !DIExpression()), [[DBG140]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG142:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META138:metadata !.*]], metadata !DIExpression()), [[DBG142]] +; LZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG143:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG143]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG144:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG140]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META137]], metadata !DIExpression()), [[DBG140]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG145:!dbg !.*]] +; +; NOLZCNT-LABEL: @p9( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG140:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG141:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META137:metadata !.*]], metadata !DIExpression()), [[DBG141]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG142:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META138:metadata !.*]], metadata !DIExpression()), [[DBG142]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG143:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG143]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG144:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG141]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META137]], metadata !DIExpression()), [[DBG141]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG145:!dbg !.*]] ; entry: br label %loop