diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -1961,7 +1962,8 @@ /// \endcode static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, - Value *&CurrX, Value *&NextX) { + Value *&CurrX, Value *&NextX, + size_t &CanonicalHeaderSize) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Performing shift-until-bittest idiom detection.\n"); @@ -1992,6 +1994,7 @@ // Step 2: Check if the backedge's condition is in desirable form. auto MatchVariableBitMask = [&]() { + CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_c_And(m_Value(CurrX), @@ -2001,14 +2004,24 @@ CurLoop)))); }; auto MatchConstantBitMask = [&]() { + CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_And(m_Value(CurrX), m_CombineAnd(m_Value(BitMask), m_Power2()))) && (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; + auto MatchDecomposableConstantBitMask = [&]() { + CanonicalHeaderSize = 4; + + APInt Mask; + return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && + ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && + (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && + (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); + }; - if (!MatchVariableBitMask() && !MatchConstantBitMask()) { - // FIXME: support sign bit test (use llvm::decomposeBitTestICmp()). + if (!MatchVariableBitMask() && !MatchConstantBitMask() && + !MatchDecomposableConstantBitMask()) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n"); return false; } @@ -2097,8 +2110,9 @@ bool MadeChange = false; Value *X, *BitMask, *BitPos, *XCurr, *XNext; - if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, - XNext)) { + size_t CanonicalHeaderSize; + if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, XNext, + CanonicalHeaderSize)) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detection failed.\n"); return MadeChange; @@ -2118,7 +2132,6 @@ // The loop must not have any other instructions other than the idiom itself. // FIXME: we could just rewrite the loop with countable trip count. size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug(); - constexpr size_t CanonicalHeaderSize = 5; assert(HeaderSize >= CanonicalHeaderSize); if (HeaderSize > CanonicalHeaderSize) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n"); diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -451,20 +451,42 @@ } define i32 @p7_constant_mask_signbit_canonical(i32 %x) { -; ALL-LABEL: @p7_constant_mask_signbit_canonical( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG116:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]] +; LZCNT-LABEL: @p7_constant_mask_signbit_canonical( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG116:!dbg !.*]] +; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG116]] +; LZCNT-NEXT: [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG116]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG116]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG116]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG116]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG116]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG117:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG116]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG116]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG120:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG116]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]] +; +; NOLZCNT-LABEL: @p7_constant_mask_signbit_canonical( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG116:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]] ; entry: br label %loop @@ -677,20 +699,42 @@ ; ICmp-Br are commutative define i32 @p11(i32 %x) { -; ALL-LABEL: @p11( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG172:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]] +; LZCNT-LABEL: @p11( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG172:!dbg !.*]] +; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG172]] +; LZCNT-NEXT: [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG172]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG172]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG172]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG173:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG172]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG172]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG172]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]] +; +; NOLZCNT-LABEL: @p11( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG172:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]] ; entry: br label %loop