diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1475,6 +1475,7 @@ return false; // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 + // or cnt.next = cnt + -1. // TODO: We can skip the step. If loop trip count is known (CTLZ), // then all uses of "cnt.next" could be optimized to the trip count // plus "cnt0". Currently it is not optimized. @@ -1488,7 +1489,7 @@ continue; ConstantInt *Inc = dyn_cast(Inst->getOperand(1)); - if (!Inc || !Inc->isOne()) + if (!Inc || (!Inc->isOne() && !Inc->isMinusOne())) continue; PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); @@ -1751,11 +1752,18 @@ NewCount = Builder.CreateZExtOrTrunc(NewCount, cast(CntInst->getType())); - // If the counter's initial value is not zero, insert Add Inst. Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader); - ConstantInt *InitConst = dyn_cast(CntInitVal); - if (!InitConst || !InitConst->isZero()) - NewCount = Builder.CreateAdd(NewCount, CntInitVal); + if (cast(CntInst->getOperand(1))->isOne()) { + // If the counter was being incremented in the loop, add NewCount to the + // counter's initial value, but only if the initial value is not zero. + ConstantInt *InitConst = dyn_cast(CntInitVal); + if (!InitConst || !InitConst->isZero()) + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + } else { + // If the count was being decremented in the loop, subract NewCount from the + // counter's initial value. + NewCount = Builder.CreateSub(CntInitVal, NewCount); + } // Step 2: Insert new IV and loop condition: // loop: diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll --- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -526,3 +526,89 @@ while.end: ; preds = %while.cond ret i32 %inc } + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_decrement(int n) +; { +; int i = 32; +; while(n) { +; n >>= 1; +; i--; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL-NEXT: %1 = sub i32 32, %0 +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL: %inc.lcssa = phi i32 [ %2, %while.body ] +; ALL: %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc.lcssa, %while.end.loopexit ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_decrement(i32 %n) { +entry: + %tobool4 = icmp eq i32 %n, 0 + br i1 %tobool4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %i.06 = phi i32 [ %inc, %while.body ], [ 32, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] + %shr = lshr i32 %n.addr.05, 1 + %inc = add nsw i32 %i.06, -1 + %tobool = icmp eq i32 %shr, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc, %while.end.loopexit ] + ret i32 %i.0.lcssa +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_lshr_decrement(int n) +; { +; int i = 31; +; while(n >>= 1) { +; i--; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL-NEXT: %4 = sub i32 31, %2 +; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_lshr_decrement(i32 %n) { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ 31, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, -1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} diff --git a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll --- a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll @@ -80,3 +80,88 @@ ret i32 %i.0 } +; Recognize CTTZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_decrement(int n) +; { +; int i = 32; +; while(n) { +; n <<= 1; +; i--; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = call i32 @llvm.cttz.i32(i32 %n, i1 true) +; ALL-NEXT: %1 = sub i32 32, %0 +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL: %inc.lcssa = phi i32 [ %2, %while.body ] +; ALL: %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc.lcssa, %while.end.loopexit ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @cttz_decrement(i32 %n) { +entry: + %tobool4 = icmp eq i32 %n, 0 + br i1 %tobool4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %i.06 = phi i32 [ %inc, %while.body ], [ 32, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shl, %while.body ], [ %n, %while.body.preheader ] + %shl = shl i32 %n.addr.05, 1 + %inc = add nsw i32 %i.06, -1 + %tobool = icmp eq i32 %shl, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc, %while.end.loopexit ] + ret i32 %i.0.lcssa +} + +; Recognize CTTZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int cttz_shl_decrement(int n) +; { +; int i = 31; +; while(n <<= 1) { +; i--; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = shl i32 %n, 1 +; ALL-NEXT: %1 = call i32 @llvm.cttz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL-NEXT: %4 = sub i32 31, %2 +; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @cttz_shl_decrement(i32 %n) { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %shl, %while.cond ] + %i.0 = phi i32 [ 31, %entry ], [ %inc, %while.cond ] + %shl = shl i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shl, 0 + %inc = add nsw i32 %i.0, -1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +}