Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1125,6 +1125,26 @@ KnownBits Known2; switch (Op.getOpcode()) { + case ISD::VSCALE: { + Function const &F = TLO.DAG.getMachineFunction().getFunction(); + Attribute const &Attr = F.getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) + return false; + std::optional MaxVScale = Attr.getVScaleRangeMax(); + if (!MaxVScale.has_value()) + return false; + APInt Multiplier = Op.getConstantOperandAPInt(0); + unsigned Width = Multiplier.getActiveBits() + Log2_32(*MaxVScale) + 1; + APInt VScaleResultUpperbound = + APInt(Width, *MaxVScale) * Multiplier.sextOrTrunc(Width); + bool Negative = VScaleResultUpperbound.isNegative(); + if (Negative) + VScaleResultUpperbound = ~VScaleResultUpperbound; + unsigned RequiredBits = VScaleResultUpperbound.getActiveBits(); + if (RequiredBits < BitWidth) + (Negative ? Known.One : Known.Zero).setHighBits(BitWidth - RequiredBits); + return false; + } case ISD::SCALAR_TO_VECTOR: { if (VT.isScalableVector()) return false; Index: llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll +++ llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll @@ -14,9 +14,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: and w9, w8, #0x1f -; CHECK-NEXT: and w8, w8, #0xfffffffc -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0x1c +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %and_redundant = and i32 %vscale, 31 @@ -85,8 +84,7 @@ ; CHECK-LABEL: vscale_trunc_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: and x0, x8, #0xffffffff +; CHECK-NEXT: lsr x0, x8, #4 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %zext = zext i32 %vscale to i64 @@ -97,8 +95,7 @@ ; CHECK-LABEL: vscale_trunc_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: lsr x0, x8, #4 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %sext = sext i32 %vscale to i64 @@ -200,9 +197,8 @@ ; CHECK-NEXT: mov w9, #5 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: and w9, w8, #0x7f -; CHECK-NEXT: and w8, w8, #0x3f -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0x3f +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %mul = mul i32 %vscale, 5 @@ -219,9 +215,8 @@ ; CHECK-NEXT: mov x9, #-5 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: orr w9, w8, #0xffffff80 -; CHECK-NEXT: and w8, w8, #0xffffffc0 -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0xffffffc0 +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %mul = mul i32 %vscale, -5 @@ -231,6 +226,22 @@ ret i32 %result } +define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) { +; CHECK-LABEL: pow2_vscale_with_negative_multiplier: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: orr w9, w8, #0xfffffff0 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret + %vscale = call i32 @llvm.vscale.i32() + %mul = mul i32 %vscale, -2 + %or_redundant = or i32 %mul, 4294967264 + %or_required = or i32 %mul, 4294967280 + %result = add i32 %or_redundant, %or_required + ret i32 %result +} + declare i32 @llvm.vscale.i32() declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) Index: llvm/test/CodeGen/RISCV/vscale-demanded-bits.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/vscale-demanded-bits.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr +v -filetype asm -o - %s | FileCheck %s + +define dso_local void @f(ptr nocapture noundef writeonly %buffer, i32 noundef signext %n) local_unnamed_addr #0 { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a4, a3, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: neg a5, a3 +; CHECK-NEXT: and a4, a4, a5 +; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: .LBB0_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v24, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v24, a1 +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: vse8.v v8, (a5), v0.t +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vx v8, v8, a3 +; CHECK-NEXT: bne a4, a2, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + %wide.trip.count = zext i32 %n to i64 + %0 = tail call i64 @llvm.vscale.i64() + %1 = shl nuw nsw i64 %0, 3 + %2 = add nsw i64 %1, -1 + %n.rnd.up = add nsw i64 %2, %wide.trip.count + %n.mod.vf = urem i64 %n.rnd.up, %1 + %n.vec = sub nsw i64 %n.rnd.up, %n.mod.vf + %3 = tail call @llvm.experimental.stepvector.nxv8i8() + %4 = tail call i8 @llvm.vscale.i8() + %5 = shl i8 %4, 3 + %.splatinsert = insertelement poison, i8 %5, i64 0 + %.splat = shufflevector %.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %vec.ind = phi [ %3, %entry ], [ %vec.ind.next, %vector.body ] + %active.lane.mask = tail call @llvm.get.active.lane.mask.nxv8i1.i64(i64 %index, i64 %wide.trip.count) + %6 = getelementptr inbounds i8, ptr %buffer, i64 %index + tail call void @llvm.masked.store.nxv8i8.p0( %vec.ind, ptr %6, i32 1, %active.lane.mask) + %index.next = add i64 %index, %1 + %vec.ind.next = add %vec.ind, %.splat + %7 = icmp eq i64 %index.next, %n.vec + br i1 %7, label %exit, label %vector.body + +exit: + ret void +} + +declare i64 @llvm.vscale.i64() + +declare @llvm.experimental.stepvector.nxv8i8() + +declare i8 @llvm.vscale.i8() + +declare @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) + +declare void @llvm.masked.store.nxv8i8.p0(, ptr nocapture, i32 immarg, ) + +attributes #0 = { vscale_range(2,1024) }