Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -188,8 +188,9 @@ PHINode *CntPhi, Value *Var); bool recognizeAndInsertCTLZ(); void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, - PHINode *CntPhi, Value *Var, const DebugLoc DL, - bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + PHINode *CntPhi, Value *Var, Instruction *DefX, + const DebugLoc DL, bool ZeroCheck, + bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -1316,8 +1317,8 @@ return false; // step 2: detect instructions corresponding to "x.next = x >> 1" - // TODO: Support loops that use LShr. - if (!DefX || DefX->getOpcode() != Instruction::AShr) + // DONE: Support loops that use LShr. + if (!DefX || (DefX->getOpcode() != Instruction::AShr && DefX->getOpcode() != Instruction::LShr)) return false; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); if (!Shft || !Shft->isOne()) @@ -1401,8 +1402,8 @@ // Make sure the initial value can't be negative otherwise the ashr in the // loop might never reach zero which would make the loop infinite. - // TODO: Support loops that use lshr and wouldn't need this check. - if (!isKnownNonNegative(InitX, *DL)) + // DONE: Support loops that use lshr and wouldn't need this check. + if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL)) return false; // If we check X != 0 before entering the loop we don't need a zero @@ -1434,7 +1435,7 @@ return false; const DebugLoc DL = DefX->getDebugLoc(); - transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck, + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX, DL, ZeroCheck, IsCntPhiUsedOutsideLoop); return true; } @@ -1548,7 +1549,7 @@ /// If CntInst and DefX are not used in LOOP_BODY they will be removed. void LoopIdiomRecognize::transformLoopToCountable( BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, - const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { + Instruction *DefX, const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { BranchInst *PreheaderBr = cast(Preheader->getTerminator()); // Step 1: Insert the CTLZ instruction at the end of the preheader block @@ -1559,10 +1560,14 @@ Builder.SetCurrentDebugLocation(DL); Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; - if (IsCntPhiUsedOutsideLoop) - InitXNext = Builder.CreateAShr(InitX, + if (IsCntPhiUsedOutsideLoop){ + if(DefX->getOpcode() == Instruction::AShr) + InitXNext = Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1)); - else + else if(DefX->getOpcode() == Instruction::LShr) + InitXNext = Builder.CreateLShr(InitX, + ConstantInt::get(InitX->getType(), 1)); + }else InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); Count = Builder.CreateSub( Index: test/Transforms/LoopIdiom/X86/ctlz.ll =================================================================== --- test/Transforms/LoopIdiom/X86/ctlz.ll +++ test/Transforms/LoopIdiom/X86/ctlz.ll @@ -115,6 +115,34 @@ ret i32 %i.0.lcssa } +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_zero_check_lshr(i32 %n) { +entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + %tobool4 = icmp eq i32 %abs_n, 0 + br i1 %tobool4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ] + %shr = lshr i32 %n.addr.05, 1 + %inc = add nsw i32 %i.06, 1 + %tobool = icmp eq i32 %shr, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ] + ret i32 %i.0.lcssa +} + ; Recognize CTLZ builtin pattern. ; Here it will replace the loop - ; assume builtin is always profitable. @@ -157,6 +185,48 @@ ret i32 %i.0 } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_lshr(int n) +; { +; n = n >= 0 ? n : -n; +; int i = 0; +; while(n >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %abs_n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_lshr(i32 %n) { +entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + ; Recognize CTLZ builtin pattern. ; Here it will replace the loop - ; assume builtin is always profitable. @@ -200,6 +270,49 @@ ret i32 %i.0 } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_add_lshr(int n, int i0) +; { +; n = n >= 0 ? n : -n; +; int i = i0; +; while(n >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %abs_n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL-NEXT: %4 = add i32 %2, %i0 +; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ] +; ALL: ret i32 %i.0.lcssa +; +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_add_lshr(i32 %n, i32 %i0) { +entry: + %c = icmp sgt i32 %n, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + ; Recognize CTLZ builtin pattern. ; Here it will replace the loop - ; assume builtin is always profitable. @@ -245,6 +358,51 @@ ret i32 %i.0 } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_sext_lshr(short in) +; { +; int n = in; +; if (in < 0) +; n = -n; +; int i = 0; +; while(n >>= 1) { +; i++; +; } +; return i; +; } +; +; ALL: entry +; ALL: %0 = lshr i32 %abs_n, 1 +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT: %2 = sub i32 32, %1 +; ALL-NEXT: %3 = add i32 %2, 1 +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL: ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_sext_lshr(i16 %in) { +entry: + %n = sext i16 %in to i32 + %c = icmp sgt i16 %in, 0 + %negn = sub nsw i32 0, %n + %abs_n = select i1 %c, i32 %n, i32 %negn + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] + %shr = lshr i32 %n.addr.0, 1 + %tobool = icmp eq i32 %shr, 0 + %inc = add nsw i32 %i.0, 1 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret i32 %i.0 +} + ; This loop contains a volatile store. If x is initially negative, ; the code will be an infinite loop because the ashr will eventually produce ; all ones and continue doing so. This prevents the loop from terminating. If