Index: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1316,6 +1316,7 @@ return false; // step 2: detect instructions corresponding to "x.next = x >> 1" + // TODO: Support loops that use LShr. if (!DefX || DefX->getOpcode() != Instruction::AShr) return false; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); @@ -1397,6 +1398,13 @@ // parent function RunOnLoop. BasicBlock *PH = CurLoop->getLoopPreheader(); Value *InitX = PhiX->getIncomingValueForBlock(PH); + + // Make sure the initial value can't be negative otherwise the ashr in the + // loop might never reach zero which would make the loop infinite. + // TODO: Support loops that use lshr and wouldn't need this check. + if (!isKnownNonNegative(InitX, *DL)) + return false; + // If we check X != 0 before entering the loop we don't need a zero // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the // loop (if it is used we count CTLZ(X >> 1)). Index: llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll =================================================================== --- llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll +++ llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll @@ -7,6 +7,7 @@ ; ; int ctlz_and_other(int n, char *a) ; { +; n = n >= 0 ? n : -n; ; int i = 0, n0 = n; ; while(n >>= 1) { ; a[i] = (n0 & (1 << i)) ? 1 : 0; @@ -30,7 +31,10 @@ ; Function Attrs: norecurse nounwind uwtable define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) { entry: - %shr8 = ashr i32 %n, 1 + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + %shr8 = lshr i32 %abs_n, 1 %tobool9 = icmp eq i32 %shr8, 0 br i1 %tobool9, label %while.end, label %while.body.preheader @@ -42,7 +46,7 @@ %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ] %0 = trunc i64 %indvars.iv to i32 %shl = shl i32 1, %0 - %and = and i32 %shl, %n + %and = and i32 %shl, %abs_n %tobool1 = icmp ne i32 %and, 0 %conv = zext i1 %tobool1 to i8 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv @@ -67,6 +71,7 @@ ; ; int ctlz_zero_check(int n) ; { +; n = n >= 0 ? n : -n; ; int i = 0; ; while(n) { ; n >>= 1; @@ -76,7 +81,7 @@ ; } ; ; ALL: entry -; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true) ; ALL-NEXT: %1 = sub i32 32, %0 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ] ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] @@ -85,7 +90,10 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_zero_check(i32 %n) { entry: - %tobool4 = icmp eq i32 %n, 0 + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + %tobool4 = icmp eq i32 %abs_n, 0 br i1 %tobool4, label %while.end, label %while.body.preheader while.body.preheader: ; preds = %entry @@ -93,7 +101,7 @@ while.body: ; preds = %while.body.preheader, %while.body %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] - %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ] %shr = ashr i32 %n.addr.05, 1 %inc = add nsw i32 %i.06, 1 %tobool = icmp eq i32 %shr, 0 @@ -113,6 +121,7 @@ ; ; int ctlz(int n) ; { +; n = n >= 0 ? n : -n; ; int i = 0; ; while(n >>= 1) { ; i++; @@ -121,7 +130,7 @@ ; } ; ; ALL: entry -; ALL: %0 = ashr i32 %n, 1 +; ALL: %0 = ashr i32 %abs_n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -131,10 +140,13 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz(i32 %n) { entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %shr = ashr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 @@ -151,6 +163,7 @@ ; ; int ctlz_add(int n, int i0) ; { +; n = n >= 0 ? n : -n; ; int i = i0; ; while(n >>= 1) { ; i++; @@ -159,7 +172,7 @@ ; } ; ; ALL: entry -; ALL: %0 = ashr i32 %n, 1 +; ALL: %0 = ashr i32 %abs_n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -170,10 +183,13 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_add(i32 %n, i32 %i0) { entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] %shr = ashr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 Index: llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll =================================================================== --- llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll +++ llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -7,6 +7,7 @@ ; ; int ctlz_and_other(int n, char *a) ; { +; n = n >= 0 ? n : -n; ; int i = 0, n0 = n; ; while(n >>= 1) { ; a[i] = (n0 & (1 << i)) ? 1 : 0; @@ -30,7 +31,10 @@ ; Function Attrs: norecurse nounwind uwtable define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) { entry: - %shr8 = ashr i32 %n, 1 + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + %shr8 = lshr i32 %abs_n, 1 %tobool9 = icmp eq i32 %shr8, 0 br i1 %tobool9, label %while.end, label %while.body.preheader @@ -42,7 +46,7 @@ %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ] %0 = trunc i64 %indvars.iv to i32 %shl = shl i32 1, %0 - %and = and i32 %shl, %n + %and = and i32 %shl, %abs_n %tobool1 = icmp ne i32 %and, 0 %conv = zext i1 %tobool1 to i8 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv @@ -67,6 +71,7 @@ ; ; int ctlz_zero_check(int n) ; { +; n = n >= 0 ? n : -n; ; int i = 0; ; while(n) { ; n >>= 1; @@ -76,7 +81,7 @@ ; } ; ; ALL: entry -; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true) ; ALL-NEXT: %1 = sub i32 32, %0 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ] ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] @@ -85,7 +90,10 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_zero_check(i32 %n) { entry: - %tobool4 = icmp eq i32 %n, 0 + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + %tobool4 = icmp eq i32 %abs_n, 0 br i1 %tobool4, label %while.end, label %while.body.preheader while.body.preheader: ; preds = %entry @@ -93,7 +101,7 @@ while.body: ; preds = %while.body.preheader, %while.body %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] - %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ] %shr = ashr i32 %n.addr.05, 1 %inc = add nsw i32 %i.06, 1 %tobool = icmp eq i32 %shr, 0 @@ -113,6 +121,7 @@ ; ; int ctlz(int n) ; { +; n = n >= 0 ? n : -n; ; int i = 0; ; while(n >>= 1) { ; i++; @@ -121,7 +130,7 @@ ; } ; ; ALL: entry -; ALL: %0 = ashr i32 %n, 1 +; ALL: %0 = ashr i32 %abs_n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -131,10 +140,13 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz(i32 %n) { entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %shr = ashr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 @@ -151,6 +163,7 @@ ; ; int ctlz_add(int n, int i0) ; { +; n = n >= 0 ? n : -n; ; int i = i0; ; while(n >>= 1) { ; i++; @@ -159,7 +172,7 @@ ; } ; ; ALL: entry -; ALL: %0 = ashr i32 %n, 1 +; ALL: %0 = ashr i32 %abs_n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -170,10 +183,13 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_add(i32 %n, i32 %i0) { entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] %shr = ashr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 @@ -189,7 +205,6 @@ ; all ones and continue doing so. This prevents the loop from terminating. If ; we convert this to a countable loop using ctlz that loop will only run 32 ; times. This is different than the infinite number of times of the original. -; FIXME: Don't transform this loop. define i32 @foo(i32 %x) { ; LZCNT-LABEL: @foo( ; LZCNT-NEXT: entry: @@ -197,21 +212,17 @@ ; LZCNT-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[X:%.*]], 0 ; LZCNT-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_LR_PH:%.*]] ; LZCNT: while.body.lr.ph: -; LZCNT-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]] ; LZCNT-NEXT: br label [[WHILE_BODY:%.*]] ; LZCNT: while.body: -; LZCNT-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_LR_PH]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ] ; LZCNT-NEXT: [[CNT_06:%.*]] = phi i32 [ 0, [[WHILE_BODY_LR_PH]] ], [ [[INC:%.*]], [[WHILE_BODY]] ] ; LZCNT-NEXT: [[X_ADDR_05:%.*]] = phi i32 [ [[X]], [[WHILE_BODY_LR_PH]] ], [ [[SHR:%.*]], [[WHILE_BODY]] ] ; LZCNT-NEXT: [[SHR]] = ashr i32 [[X_ADDR_05]], 1 ; LZCNT-NEXT: [[INC]] = add i32 [[CNT_06]], 1 ; LZCNT-NEXT: store volatile i8 42, i8* [[V]], align 1 -; LZCNT-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1 -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0 +; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0 ; LZCNT-NEXT: br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY]] ; LZCNT: while.cond.while.end_crit_edge: -; LZCNT-NEXT: [[SPLIT:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ] +; LZCNT-NEXT: [[SPLIT:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ] ; LZCNT-NEXT: br label [[WHILE_END]] ; LZCNT: while.end: ; LZCNT-NEXT: [[CNT_0_LCSSA:%.*]] = phi i32 [ [[SPLIT]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]