Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1317,7 +1317,6 @@ return false; // step 2: detect instructions corresponding to "x.next = x >> 1" - // DONE: Support loops that use LShr. if (!DefX || (DefX->getOpcode() != Instruction::AShr && DefX->getOpcode() != Instruction::LShr)) return false; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); @@ -1402,7 +1401,6 @@ // Make sure the initial value can't be negative otherwise the ashr in the // loop might never reach zero which would make the loop infinite. - // DONE: Support loops that use lshr and wouldn't need this check. if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL)) return false; @@ -1560,15 +1558,13 @@ Builder.SetCurrentDebugLocation(DL); Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; - if (IsCntPhiUsedOutsideLoop){ - if(DefX->getOpcode() == Instruction::AShr) - InitXNext = Builder.CreateAShr(InitX, - ConstantInt::get(InitX->getType(), 1)); - else if(DefX->getOpcode() == Instruction::LShr) - InitXNext = Builder.CreateLShr(InitX, - ConstantInt::get(InitX->getType(), 1)); - }else - InitXNext = InitX; + if (IsCntPhiUsedOutsideLoop) { + if (DefX->getOpcode() == Instruction::AShr) + InitXNext = Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1)); + else if (DefX->getOpcode() == Instruction::LShr) + InitXNext = Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); + } else + InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); Count = Builder.CreateSub( ConstantInt::get(CTLZ->getType(), Index: test/Transforms/LoopIdiom/X86/ctlz.ll =================================================================== --- test/Transforms/LoopIdiom/X86/ctlz.ll +++ test/Transforms/LoopIdiom/X86/ctlz.ll @@ -115,13 +115,31 @@ ret i32 %i.0.lcssa } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_zero_check_lshr(int n) +; { +; int i = 0; +; while(n) { +; n >>= 1; +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL-NEXT: %1 = sub i32 32, %0 +; ALL: %inc.lcssa = phi i32 [ %1, %while.body ] +; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] +; ALL: ret i32 %i.0.lcssa + ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_zero_check_lshr(i32 %n) { entry: - %c = icmp sgt i32 %n, 0 - %negn = sub nsw i32 0, %n - %abs_n = select i1 %c, i32 %n, i32 %negn - %tobool4 = icmp eq i32 %abs_n, 0 + %tobool4 = icmp eq i32 %n, 0 br i1 %tobool4, label %while.end, label %while.body.preheader while.body.preheader: ; preds = %entry @@ -129,7 +147,7 @@ while.body: ; preds = %while.body.preheader, %while.body %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] - %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] %shr = lshr i32 %n.addr.05, 1 %inc = add nsw i32 %i.06, 1 %tobool = icmp eq i32 %shr, 0 @@ -191,7 +209,6 @@ ; ; int ctlz_lshr(int n) ; { -; n = n >= 0 ? n : -n; ; int i = 0; ; while(n >>= 1) { ; i++; @@ -200,7 +217,7 @@ ; } ; ; ALL: entry -; ALL: %0 = lshr i32 %abs_n, 1 +; ALL: %0 = lshr i32 %n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -210,13 +227,10 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_lshr(i32 %n) { entry: - %c = icmp sgt i32 %n, 0 - %negn = sub nsw i32 0, %n - %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %shr = lshr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 @@ -276,7 +290,6 @@ ; ; int ctlz_add_lshr(int n, int i0) ; { -; n = n >= 0 ? n : -n; ; int i = i0; ; while(n >>= 1) { ; i++; @@ -285,7 +298,7 @@ ; } ; ; ALL: entry -; ALL: %0 = lshr i32 %abs_n, 1 +; ALL: %0 = lshr i32 %n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -296,13 +309,10 @@ ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_add_lshr(i32 %n, i32 %i0) { entry: - %c = icmp sgt i32 %n, 0 - %negn = sub nsw i32 0, %n - %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] %shr = lshr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0 @@ -364,18 +374,15 @@ ; ; int ctlz_sext_lshr(short in) ; { -; int n = in; -; if (in < 0) -; n = -n; ; int i = 0; -; while(n >>= 1) { +; while(in >>= 1) { ; i++; ; } ; return i; ; } ; ; ALL: entry -; ALL: %0 = lshr i32 %abs_n, 1 +; ALL: %0 = lshr i32 %n, 1 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) ; ALL-NEXT: %2 = sub i32 32, %1 ; ALL-NEXT: %3 = add i32 %2, 1 @@ -386,13 +393,10 @@ define i32 @ctlz_sext_lshr(i16 %in) { entry: %n = sext i16 %in to i32 - %c = icmp sgt i16 %in, 0 - %negn = sub nsw i32 0, %n - %abs_n = select i1 %c, i32 %n, i32 %negn br label %while.cond while.cond: ; preds = %while.cond, %entry - %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %shr = lshr i32 %n.addr.0, 1 %tobool = icmp eq i32 %shr, 0