Index: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -188,8 +188,9 @@ PHINode *CntPhi, Value *Var); bool recognizeAndInsertCTLZ(); void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, - PHINode *CntPhi, Value *Var, const DebugLoc &DL, - bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + PHINode *CntPhi, Value *Var, Instruction *DefX, + const DebugLoc &DL, bool ZeroCheck, + bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -1316,8 +1317,8 @@ return false; // step 2: detect instructions corresponding to "x.next = x >> 1" - // TODO: Support loops that use LShr. - if (!DefX || DefX->getOpcode() != Instruction::AShr) + if (!DefX || (DefX->getOpcode() != Instruction::AShr && + DefX->getOpcode() != Instruction::LShr)) return false; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); if (!Shft || !Shft->isOne()) @@ -1401,8 +1402,7 @@ // Make sure the initial value can't be negative otherwise the ashr in the // loop might never reach zero which would make the loop infinite. - // TODO: Support loops that use lshr and wouldn't need this check. - if (!isKnownNonNegative(InitX, *DL)) + if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL)) return false; // If we check X != 0 before entering the loop we don't need a zero @@ -1433,8 +1433,9 @@ TargetTransformInfo::TCC_Basic) return false; - transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX->getDebugLoc(), - ZeroCheck, IsCntPhiUsedOutsideLoop); + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX, + DefX->getDebugLoc(), ZeroCheck, + IsCntPhiUsedOutsideLoop); return true; } @@ -1547,7 +1548,8 @@ /// If CntInst and DefX are not used in LOOP_BODY they will be removed. void LoopIdiomRecognize::transformLoopToCountable( BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, - const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { + Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, + bool IsCntPhiUsedOutsideLoop) { BranchInst *PreheaderBr = cast(Preheader->getTerminator()); // Step 1: Insert the CTLZ instruction at the end of the preheader block @@ -1558,10 +1560,16 @@ Builder.SetCurrentDebugLocation(DL); Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; - if (IsCntPhiUsedOutsideLoop) - InitXNext = Builder.CreateAShr(InitX, - ConstantInt::get(InitX->getType(), 1)); - else + if (IsCntPhiUsedOutsideLoop) { + if (DefX->getOpcode() == Instruction::AShr) + InitXNext = + Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1)); + else if (DefX->getOpcode() == Instruction::LShr) + InitXNext = + Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); + else + llvm_unreachable("Unexpected opcode!"); + } else InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); Count = Builder.CreateSub( Index: llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll =================================================================== --- llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll +++ llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -119,6 +119,52 @@ ; Here it will replace the loop - ; assume builtin is always profitable. ; +; int ctlz_zero_check_lshr(int n) +; { +; int i = 0; +; while(n) { +; n >>= 1; +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL-NEXT: %1 = sub i32 32, %0 +; ALL: %inc.lcssa = phi i32 [ %1, %while.body ] +; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_zero_check_lshr(i32 %n) { +entry: + %tobool4 = icmp eq i32 %n, 0 + br i1 %tobool4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] + %shr = lshr i32 %n.addr.05, 1 + %inc = add nsw i32 %i.06, 1 + %tobool = icmp eq i32 %shr, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ] + ret i32 %i.0.lcssa +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; ; int ctlz(int n) ; { ; n = n >= 0 ? n : -n; @@ -161,6 +207,44 @@ ; Here it will replace the loop - ; assume builtin is always profitable. ; +; int ctlz_lshr(int n) +; { +; int i = 0; +; while(n >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_lshr(i32 %n) { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; ; int ctlz_add(int n, int i0) ; { ; n = n >= 0 ? n : -n; @@ -204,6 +288,45 @@ ; Here it will replace the loop - ; assume builtin is always profitable. ; +; int ctlz_add_lshr(int n, int i0) +; { +; int i = i0; +; while(n >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL-NEXT: %4 = add i32 %2, %i0 +; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ] +; ALL: ret i32 %i.0.lcssa +; +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_add_lshr(i32 %n, i32 %i0) { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; ; int ctlz_sext(short in) ; { ; int n = in; @@ -245,6 +368,45 @@ ret i32 %i.0 } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_sext_lshr(short in) +; { +; int i = 0; +; while(in >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_sext_lshr(i16 %in) { +entry: + %n = sext i16 %in to i32 + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + ; This loop contains a volatile store. If x is initially negative, ; the code will be an infinite loop because the ashr will eventually produce ; all ones and continue doing so. This prevents the loop from terminating. If