Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2760,14 +2760,17 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, const APInt &C) { IntrinsicInst *II = dyn_cast(Cmp.getOperand(0)); - if (!II || !Cmp.isEquality()) + if (!II) return nullptr; - // Handle icmp {eq|ne} , Constant. + // Handle icmp pred , Constant. Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: + if (!Cmp.isEquality()) + return nullptr; + Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap())); @@ -2776,18 +2779,21 @@ case Intrinsic::ctlz: case Intrinsic::cttz: { // ctz(A) == bitwidth(A) -> A == 0 and likewise for != - if (C == BitWidth) { + if (Cmp.isEquality() && C == BitWidth) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } - // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set - // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. // Limit to one use to ensure we don't increase instruction count. - unsigned Num = C.getLimitedValue(BitWidth); - if (Num != BitWidth && II->hasOneUse()) { + if (!II->hasOneUse()) + return nullptr; + + unsigned Num = C.getLimitedValue(); + if (Cmp.isEquality() && Num < BitWidth) { + // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set + // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) : APInt::getHighBitsSet(BitWidth, Num + 1); @@ -2799,10 +2805,36 @@ Worklist.Add(II); return &Cmp; } + + if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && Num < BitWidth) { + bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; + APInt Mask = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) + : APInt::getHighBitsSet(BitWidth, Num + 1); + Cmp.setPredicate(ICmpInst::ICMP_EQ); + Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask)); + Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); + Worklist.Add(II); + return &Cmp; + } + + if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && + Num >= 1 && Num <= BitWidth) { + bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; + APInt Mask = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num) + : APInt::getHighBitsSet(BitWidth, Num); + Cmp.setPredicate(ICmpInst::ICMP_NE); + Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask)); + Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); + Worklist.Add(II); + return &Cmp; + } break; } case Intrinsic::ctpop: { + if (!Cmp.isEquality()) + return nullptr; + // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != bool IsZero = C.isNullValue(); Index: test/Transforms/InstCombine/cmp-intrinsic.ll =================================================================== --- test/Transforms/InstCombine/cmp-intrinsic.ll +++ test/Transforms/InstCombine/cmp-intrinsic.ll @@ -149,8 +149,7 @@ define i1 @ctlz_ugt_one_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_one_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -160,8 +159,7 @@ define i1 @ctlz_ugt_other_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_other_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -171,8 +169,7 @@ define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 31 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -192,8 +189,7 @@ define i1 @ctlz_ult_other_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ult_other_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[LZ]], 16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 65535 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -203,8 +199,7 @@ define i1 @ctlz_ult_bw_minus_one_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ult_bw_minus_one_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[LZ]], 31 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -214,8 +209,7 @@ define i1 @ctlz_ult_bitwidth_i32(i32 %x) { ; CHECK-LABEL: @ctlz_ult_bitwidth_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[LZ]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -333,8 +327,8 @@ define i1 @cttz_ugt_one_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_one_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[LZ]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -344,8 +338,8 @@ define i1 @cttz_ugt_other_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_other_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[LZ]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 131071 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -355,8 +349,7 @@ define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) { ; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[LZ]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -377,8 +370,8 @@ define i1 @cttz_ult_other_i33(i33 %x) { ; CHECK-LABEL: @cttz_ult_other_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i33 [[LZ]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 65535 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -388,8 +381,8 @@ define i1 @cttz_ult_bw_minus_one_i33(i33 %x) { ; CHECK-LABEL: @cttz_ult_bw_minus_one_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i33 [[LZ]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -399,8 +392,7 @@ define i1 @cttz_ult_bitwidth_i33(i33 %x) { ; CHECK-LABEL: @cttz_ult_bitwidth_i33( -; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i33 [[LZ]], 33 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)