Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2610,8 +2610,9 @@ return I; } - if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C)) - return I; + if (auto *II = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) + return I; return nullptr; } @@ -2755,14 +2756,10 @@ return nullptr; } -/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. -Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, - const APInt &C) { - IntrinsicInst *II = dyn_cast(Cmp.getOperand(0)); - if (!II || !Cmp.isEquality()) - return nullptr; - - // Handle icmp {eq|ne} , Constant. +/// Fold an equality icmp with LLVM intrinsic and constant operand. +Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp, + IntrinsicInst *II, + const APInt &C) { Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { @@ -2822,6 +2819,65 @@ return nullptr; } +/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. +Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, + IntrinsicInst *II, + const APInt &C) { + if (Cmp.isEquality()) + return foldICmpEqIntrinsicWithConstant(Cmp, II, C); + + Type *Ty = II->getType(); + unsigned BitWidth = C.getBitWidth(); + switch (II->getIntrinsicID()) { + case Intrinsic::ctlz: { + // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000 + if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { + unsigned Num = C.getLimitedValue(); + APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, + II->getArgOperand(0), ConstantInt::get(Ty, Limit)); + } + + // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111 + if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && + C.uge(1) && C.ule(BitWidth)) { + unsigned Num = C.getLimitedValue(); + APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num); + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, + II->getArgOperand(0), ConstantInt::get(Ty, Limit)); + } + break; + } + case Intrinsic::cttz: { + // Limit to one use to ensure we don't increase instruction count. + if (!II->hasOneUse()) + return nullptr; + + // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0 + if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { + APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1); + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, + Builder.CreateAnd(II->getArgOperand(0), Mask), + ConstantInt::getNullValue(Ty)); + } + + // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0 + if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && + C.uge(1) && C.ule(BitWidth)) { + APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue()); + return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, + Builder.CreateAnd(II->getArgOperand(0), Mask), + ConstantInt::getNullValue(Ty)); + } + break; + } + default: + break; + } + + return nullptr; +} + /// Handle icmp with constant (but not simple integer constant) RHS. Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h @@ -903,7 +903,10 @@ Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, BinaryOperator *BO, const APInt &C); - Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C); + Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II, + const APInt &C); + Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II, + const APInt &C); // Helpers of visitSelectInst(). Instruction *foldSelectExtConst(SelectInst &Sel); Index: llvm/trunk/test/Transforms/InstCombine/cmp-intrinsic.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/cmp-intrinsic.ll +++ llvm/trunk/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -149,8 +149,7 @@ define i1 @ctlz_ugt_one_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_one_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -160,8 +159,7 @@ define i1 @ctlz_ugt_other_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_other_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -173,7 +171,7 @@ ; CHECK-LABEL: @ctlz_ugt_other_multiuse_i32( ; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 ; CHECK-NEXT: store i32 [[LZ]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 32768 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -184,8 +182,7 @@ define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 31 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -205,8 +202,7 @@ define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_other_v2i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -218,7 +214,7 @@ ; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32( ; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: store <2 x i32> [[LZ]], <2 x i32>* [[P:%.*]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -229,8 +225,7 @@ define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -240,8 +235,7 @@ define <2 x i1> @ctlz_ult_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_bitwidth_v2i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -359,8 +353,8 @@ define i1 @cttz_ugt_one_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_one_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -370,8 +364,8 @@ define i1 @cttz_ugt_other_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_other_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 131071 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -394,8 +388,7 @@ define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -415,8 +408,8 @@ define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_other_v2i32( -; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false) @@ -439,8 +432,8 @@ define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false) @@ -450,8 +443,7 @@ define <2 x i1> @cttz_ult_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_bitwidth_v2i32( -; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)