Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1408,6 +1408,45 @@ } } + // Add range metadata since known bits can't completely reflect what we know. + if (!II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), + ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return ⅈ + } + + return nullptr; +} + +static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) { + assert(II.getIntrinsicID() == Intrinsic::ctpop && + "Expected ctpop intrinsic"); + Value *Op0 = II.getArgOperand(0); + // FIXME: Try to simplify vectors of integers. + auto *IT = dyn_cast(Op0->getType()); + if (!IT) + return nullptr; + + unsigned BitWidth = IT->getBitWidth(); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); + + unsigned MinCount = Known.countMinPopulation(); + unsigned MaxCount = Known.countMaxPopulation(); + + // Add range metadata since known bits can't completely reflect what we know. + if (!II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), + ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return ⅈ + } + return nullptr; } @@ -1980,6 +2019,11 @@ return I; break; + case Intrinsic::ctpop: + if (auto *I = foldCtpop(*II, *this)) + return I; + break; + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: Index: test/Transforms/InstCombine/ctpop.ll =================================================================== --- test/Transforms/InstCombine/ctpop.ll +++ test/Transforms/InstCombine/ctpop.ll @@ -44,7 +44,7 @@ ; Negative test for when we know nothing define i1 @test4(i8 %arg) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]) +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -52,3 +52,17 @@ %res = icmp eq i8 %cnt, 2 ret i1 %res } + +; Test when the number of possible known bits isn't one less than a power of 2 +; and the compare value is greater but less than the next power of 2. +define i1 @test5(i32 %arg) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: ret i1 false +; + %and = and i32 %arg, 3 + %cnt = call i32 @llvm.ctpop.i32(i32 %and) + %res = icmp eq i32 %cnt, 3 + ret i1 %res +} + +; CHECK: ![[RANGE]] = !{i8 0, i8 9} Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -295,7 +295,7 @@ define i1 @cttz_knownbits2(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -305,6 +305,16 @@ ret i1 %res } +define i1 @cttz_knownbits3(i32 %arg) { +; CHECK-LABEL: @cttz_knownbits3( +; CHECK-NEXT: ret i1 false +; + %or = or i32 %arg, 4 + %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone + %res = icmp eq i32 %cnt, 3 + ret i1 %res +} + define i8 @ctlz(i8 %a) { ; CHECK-LABEL: @ctlz( ; CHECK-NEXT: ret i8 2 @@ -328,7 +338,7 @@ define i1 @ctlz_knownbits2(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -338,6 +348,16 @@ ret i1 %res } +define i1 @ctlz_knownbits3(i8 %arg) { +; CHECK-LABEL: @ctlz_knownbits3( +; CHECK-NEXT: ret i1 false +; + %or = or i8 %arg, 32 + %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone + %res = icmp eq i8 %cnt, 3 + ret i1 %res +} + define void @cmp.simplify(i32 %a, i32 %b, i1* %c) { %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone %lz.cmp = icmp eq i32 %lz, 32 @@ -610,3 +630,6 @@ store volatile double %C, double* %P ret void } + +; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3} +; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}