Index: llvm/include/llvm/IR/ConstantRange.h =================================================================== --- llvm/include/llvm/IR/ConstantRange.h +++ llvm/include/llvm/IR/ConstantRange.h @@ -526,6 +526,11 @@ /// \p IntMinIsPoison is false. ConstantRange abs(bool IntMinIsPoison = false) const; + /// Calculate absolute value range. If the original range contains zero, then + /// the resulting range will be bounded if and only if \p ZeroIsPoison is + /// false. + ConstantRange ctlz(bool ZeroIsPoison = false) const; + /// Represents whether an operation on the given constant range is known to /// always or never overflow. enum class OverflowResult { Index: llvm/lib/IR/ConstantRange.cpp =================================================================== --- llvm/lib/IR/ConstantRange.cpp +++ llvm/lib/IR/ConstantRange.cpp @@ -945,6 +945,7 @@ case Intrinsic::smin: case Intrinsic::smax: case Intrinsic::abs: + case Intrinsic::ctlz: return true; default: return false; @@ -976,6 +977,12 @@ assert(IntMinIsPoison->getBitWidth() == 1 && "Must be boolean"); return Ops[0].abs(IntMinIsPoison->getBoolValue()); } + case Intrinsic::ctlz: { + const APInt *ZeroIsPoison = Ops[1].getSingleElement(); + assert(ZeroIsPoison && "Must be known (immarg)"); + assert(ZeroIsPoison->getBitWidth() == 1 && "Must be boolean"); + return Ops[0].ctlz(ZeroIsPoison->getBoolValue()); + } default: assert(!isIntrinsicSupported(IntrinsicID) && "Shouldn't be supported"); llvm_unreachable("Unsupported intrinsic"); @@ -1667,6 +1674,68 @@ APIntOps::umax(-SMin, SMax) + 1); } +ConstantRange ConstantRange::ctlz(bool ZeroIsPoison) const { + if (isEmptySet()) + return getEmpty(); + + APInt Zero = APInt::getZero(getBitWidth()); + if (ZeroIsPoison && contains(Zero)) { + // ZeroIsPoison is set, and zero is contained. We discern three cases, in + // which a zero can appear: + // 1) Lower is zero, handling cases of kind [0, 1), [0, 2), etc. + // 2) Upper is zero, wrapped set, handling cases of kind [3, 0], etc. + // 3) Zero contained in a wrapped set, e.g., [3, 2), [3, 1), etc. + + if (getLower().isZero()) { + if ((getUpper() - 1).isZero()) { + // We have in input interval of kind [0, 1). In this case we cannot + // really help but return empty-set. + return getEmpty(); + } + + // Compute the resulting range by excluding zero from Lower. + APInt Lower = APInt(getBitWidth(), (getLower() + 1).countLeadingZeros()); + APInt Upper = APInt(getBitWidth(), (getUpper() - 1).countLeadingZeros()); + if (Lower.ugt(Upper)) + std::swap(Lower, Upper); + ++Upper; + return ConstantRange(std::move(Lower), std::move(Upper)); + } else if ((getUpper() - 1).isZero()) { + // Compute the resulting range by excluding zero from Upper. + APInt Lower = APInt(getBitWidth(), (getLower()).countLeadingZeros()); + ++Lower; + return ConstantRange(Zero, std::move(Lower)); + } else { + return ConstantRange(Zero, APInt(getBitWidth(), getBitWidth())); + } + } + + if (isWrappedSet() || isFullSet()) { + // The range wraps, therefore it includes the two extreme encodings, all + // zeros and all ones. The only way we can express this [0, BitWidth + 1). + ConstantRange Res = + ConstantRange(Zero, APInt(getBitWidth(), (getBitWidth() + 1))); + + // If the returned set wraps to empty-set, return full. + if (Res.isEmptySet()) + return getFull(); + return Res; + } + + // Zero is either safe or not in the range. The output range is composed by + // the result of countLeadingZero of the two extremes, sorted. + APInt Lower = APInt(getBitWidth(), getLower().countLeadingZeros()); + APInt Upper = APInt(getBitWidth(), (getUpper() - 1).countLeadingZeros()); + if (Lower.eq(Upper)) { + return ConstantRange(std::move(Lower), Upper + 1); + } else { + if (Lower.ugt(Upper)) + std::swap(Lower, Upper); + ++Upper; + return ConstantRange(std::move(Lower), std::move(Upper)); + } +} + ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow( const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) Index: llvm/test/Analysis/LazyValueAnalysis/lvi-for-ctlz.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/LazyValueAnalysis/lvi-for-ctlz.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=jump-threading -S | FileCheck %s + +; Ensures that LazyValueInfo correctly propagates the constant range lattice values to ctlz intrinsic. + +; Constant range value may be bounded on [28,31] interval, +; as forward.0 is taken only if %val is within [1,9]. +define i32 @test_ctlz_1(i32 %val) { +; CHECK-LABEL: @test_ctlz_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[VAL:%.*]], -1 +; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[ADD]], 9 +; CHECK-NEXT: br i1 [[COND_0]], label [[FORWARD_0:%.*]], label [[FORWARD_1:%.*]] +; CHECK: forward.0: +; CHECK-NEXT: [[RET_VAL_0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[VAL]], i1 true), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: ret i32 [[RET_VAL_0]] +; CHECK: forward.1: +; CHECK-NEXT: [[COND_1:%.*]] = icmp ugt i32 [[VAL]], 20 +; CHECK-NEXT: [[RET_VAL_1:%.*]] = select i1 [[COND_1]], i32 10, i32 0 +; CHECK-NEXT: ret i32 [[RET_VAL_1]] +; +entry: + %add = add i32 %val, -1 + %cond.0 = icmp ult i32 %add, 9 + br i1 %cond.0, label %forward.0, label %forward.1 + +forward.0: ; preds = %entry + %ret_val.0 = tail call i32 @llvm.ctlz.i32(i32 %val, i1 true), !range !0 + ret i32 %ret_val.0 + +forward.1: ; preds = %entry + %cond.1 = icmp ugt i32 %val, 20 + %ret_val.1 = select i1 %cond.1, i32 10, i32 0 + ret i32 %ret_val.1 +} + +@g_unsigned_char = global i8 0 + +; Constant range value may be bounded on [23,32] interval, as it takes +; the sum of %val, which is within [0, 10], and g_unsigned_char, which is +; within [0, 255], and ZeroIsPoison is set to false. +define i32 @test_ctlz_2(i32 %val) { +; CHECK-LABEL: @test_ctlz_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[VAL:%.*]], 10 +; CHECK-NEXT: br i1 [[COND_0]], label [[FORWARD_0:%.*]], label [[FORWARD_1:%.*]] +; CHECK: forward.0: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr @g_unsigned_char, align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LOAD]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT]], [[VAL]] +; CHECK-NEXT: [[RES:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD]], i1 false), !range [[RNG0]] +; CHECK-NEXT: br label [[FORWARD_1]] +; CHECK: forward.1: +; CHECK-NEXT: [[RET_VAL:%.*]] = phi i32 [ [[RES]], [[FORWARD_0]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RET_VAL]] +; +entry: + %cond.0 = icmp ult i32 %val, 10 + br i1 %cond.0, label %forward.0, label %forward.1 + +forward.0: ; preds = %entry + %load = load i8, i8* @g_unsigned_char + %zext = zext i8 %load to i32 + %add = add nuw nsw i32 %zext, %val + %res = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false), !range !0 + br label %forward.1 + +forward.1: ; preds = %entry, %forward.0 + %ret_val = phi i32 [ %res, %forward.0 ], [ 0, %entry ] + ret i32 %ret_val +} + +; Same as test_ctlz_2, this time with ZeroIsPoison set to true. +; Hence, constant range value may be unknown. +define i32 @test_ctlz_3(i32 %val) { +; CHECK-LABEL: @test_ctlz_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[VAL:%.*]], 10 +; CHECK-NEXT: br i1 [[COND_0]], label [[FORWARD_0:%.*]], label [[FORWARD_1:%.*]] +; CHECK: forward.0: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr @g_unsigned_char, align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LOAD]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT]], [[VAL]] +; CHECK-NEXT: [[RES:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD]], i1 true), !range [[RNG0]] +; CHECK-NEXT: br label [[FORWARD_1]] +; CHECK: forward.1: +; CHECK-NEXT: [[RET_VAL:%.*]] = phi i32 [ [[RES]], [[FORWARD_0]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RET_VAL]] +; +entry: + %cond.0 = icmp ult i32 %val, 10 + br i1 %cond.0, label %forward.0, label %forward.1 + +forward.0: ; preds = %entry + %load = load i8, i8* @g_unsigned_char + %zext = zext i8 %load to i32 + %add = add nuw nsw i32 %zext, %val + %res = tail call i32 @llvm.ctlz.i32(i32 %add, i1 true), !range !0 + br label %forward.1 + +forward.1: ; preds = %entry, %forward.0 + %ret_val = phi i32 [ %res, %forward.0 ], [ 0, %entry ] + ret i32 %ret_val +} + +declare i32 @llvm.ctlz.i32(i32, i1 immarg) nounwind willreturn + +!0 = !{i32 0, i32 33} Index: llvm/unittests/IR/ConstantRangeTest.cpp =================================================================== --- llvm/unittests/IR/ConstantRangeTest.cpp +++ llvm/unittests/IR/ConstantRangeTest.cpp @@ -2397,6 +2397,21 @@ }); } +TEST_F(ConstantRangeTest, Ctlz) { + TestUnaryOpExhaustive([](const ConstantRange &CR) { return CR.ctlz(); }, + [](const APInt &N) { + return APInt(N.getBitWidth(), N.countLeadingZeros()); + }); + + TestUnaryOpExhaustive( + [](const ConstantRange &CR) { return CR.ctlz(/*ZeroIsPoison=*/true); }, + [](const APInt &N) -> std::optional { + if (N.isZero()) + return std::nullopt; + return APInt(N.getBitWidth(), N.countLeadingZeros()); + }); +} + TEST_F(ConstantRangeTest, castOps) { ConstantRange A(APInt(16, 66), APInt(16, 128)); ConstantRange FpToI8 = A.castOp(Instruction::FPToSI, 8);