Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1404,6 +1404,47 @@ } } + // Add range metadata since known bits can't completely reflect what we know. + // TODO: Handle splat vectors. + auto *IT = dyn_cast(Op0->getType()); + if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), + ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + + return nullptr; +} + +static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) { + assert(II.getIntrinsicID() == Intrinsic::ctpop && + "Expected ctpop intrinsic"); + Value *Op0 = II.getArgOperand(0); + // FIXME: Try to simplify vectors of integers. + auto *IT = dyn_cast(Op0->getType()); + if (!IT) + return nullptr; + + unsigned BitWidth = IT->getBitWidth(); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); + + unsigned MinCount = Known.countMinPopulation(); + unsigned MaxCount = Known.countMaxPopulation(); + + // Add range metadata since known bits can't completely reflect what we know. + if (!II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), + ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + return nullptr; } @@ -1976,6 +2017,11 @@ return I; break; + case Intrinsic::ctpop: + if (auto *I = foldCtpop(*II, *this)) + return I; + break; + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: Index: test/Transforms/InstCombine/ctpop.ll =================================================================== --- test/Transforms/InstCombine/ctpop.ll +++ test/Transforms/InstCombine/ctpop.ll @@ -44,7 +44,7 @@ ; Negative test for when we know nothing define i1 @test4(i8 %arg) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]) +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -55,16 +55,14 @@ ; Test when the number of possible known bits isn't one less than a power of 2 ; and the compare value is greater but less than the next power of 2. -; TODO: The icmp is unnecessary given the known bits of the input. define i1 @test5(i32 %arg) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]]) -; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 false ; %and = and i32 %arg, 3 %cnt = call i32 @llvm.ctpop.i32(i32 %and) %res = icmp eq i32 %cnt, 3 ret i1 %res } + +; CHECK: ![[RANGE]] = !{i8 0, i8 9} Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -16,6 +16,8 @@ declare double @llvm.powi.f64(double, i32) nounwind readonly declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone +declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone +declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone @@ -293,6 +295,15 @@ ret <2 x i32> %count } +define i1 @cttz_i1(i1 %arg) { +; CHECK-LABEL: @cttz_i1( +; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2 +; CHECK-NEXT: ret i1 [[CNT]] +; + %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone + ret i1 %cnt +} + define i1 @cttz_knownbits(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits( ; CHECK-NEXT: ret i1 false @@ -316,7 +327,7 @@ define i1 @cttz_knownbits2(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -339,13 +350,9 @@ ret <2 x i1> %res } -; TODO: The icmp is unnecessary given the known bits of the input. define i1 @cttz_knownbits3(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits3( -; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2 -; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 false ; %or = or i32 %arg, 4 %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone @@ -387,6 +394,15 @@ ret <2 x i8> %count } +define i1 @ctlz_i1(i1 %arg) { +; CHECK-LABEL: @ctlz_i1( +; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2 +; CHECK-NEXT: ret i1 [[CNT]] +; + %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone + ret i1 %cnt +} + define i1 @ctlz_knownbits(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits( ; CHECK-NEXT: ret i1 false @@ -410,7 +426,7 @@ define i1 @ctlz_knownbits2(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -433,13 +449,9 @@ ret <2 x i1> %res } -; TODO: The icmp is unnecessary given the known bits of the input. define i1 @ctlz_knownbits3(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits3( -; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2 -; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 3 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 false ; %or = or i8 %arg, 32 %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone @@ -790,3 +802,6 @@ store volatile double %C, double* %P ret void } + +; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3} +; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}