Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1125,6 +1125,26 @@ KnownBits Known2; switch (Op.getOpcode()) { + case ISD::VSCALE: { + Function const &F = TLO.DAG.getMachineFunction().getFunction(); + Attribute const &Attr = F.getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) + return false; + std::optional MaxVScale = Attr.getVScaleRangeMax(); + if (!MaxVScale.has_value()) + return false; + APInt Multiplier = Op.getConstantOperandAPInt(0); + unsigned Width = Multiplier.getActiveBits() + Log2_32(*MaxVScale) + 1; + APInt VScaleResultUpperbound = + APInt(Width, *MaxVScale) * Multiplier.sextOrTrunc(Width); + bool Negative = VScaleResultUpperbound.isNegative(); + if (Negative) + VScaleResultUpperbound = ~VScaleResultUpperbound; + unsigned RequiredBits = VScaleResultUpperbound.getActiveBits(); + if (RequiredBits < BitWidth) + (Negative ? Known.One : Known.Zero).setHighBits(BitWidth - RequiredBits); + return false; + } case ISD::SCALAR_TO_VECTOR: { if (VT.isScalableVector()) return false; Index: llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll +++ llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll @@ -14,9 +14,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: and w9, w8, #0x1f -; CHECK-NEXT: and w8, w8, #0xfffffffc -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0x1c +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %and_redundant = and i32 %vscale, 31 @@ -85,8 +84,7 @@ ; CHECK-LABEL: vscale_trunc_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: and x0, x8, #0xffffffff +; CHECK-NEXT: lsr x0, x8, #4 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %zext = zext i32 %vscale to i64 @@ -97,8 +95,7 @@ ; CHECK-LABEL: vscale_trunc_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: lsr x0, x8, #4 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %sext = sext i32 %vscale to i64 @@ -200,9 +197,8 @@ ; CHECK-NEXT: mov w9, #5 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: and w9, w8, #0x7f -; CHECK-NEXT: and w8, w8, #0x3f -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0x3f +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %mul = mul i32 %vscale, 5 @@ -219,9 +215,8 @@ ; CHECK-NEXT: mov x9, #-5 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: orr w9, w8, #0xffffff80 -; CHECK-NEXT: and w8, w8, #0xffffffc0 -; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: and w9, w8, #0xffffffc0 +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %mul = mul i32 %vscale, -5 @@ -231,6 +226,22 @@ ret i32 %result } +define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) { +; CHECK-LABEL: pow2_vscale_with_negative_multiplier: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: orr w9, w8, #0xfffffff0 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret + %vscale = call i32 @llvm.vscale.i32() + %mul = mul i32 %vscale, -2 + %or_redundant = or i32 %mul, 4294967264 + %or_required = or i32 %mul, 4294967280 + %result = add i32 %or_redundant, %or_required + ret i32 %result +} + declare i32 @llvm.vscale.i32() declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) Index: llvm/test/CodeGen/RISCV/vscale-demanded-bits.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/vscale-demanded-bits.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr +v -filetype asm -o - %s | FileCheck %s + +declare i8 @llvm.vscale.i8() +declare @llvm.experimental.stepvector.nxv8i8() + +define @f() #0 { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i8 @llvm.vscale.i8() + %1 = shl i8 %0, 3 + %.splat.insert = insertelement poison, i8 %1, i64 0 + %.splat = shufflevector %.splat.insert, poison, zeroinitializer + %2 = tail call @llvm.experimental.stepvector.nxv8i8() + %3 = add %2, %.splat + ret %3 +} + +attributes #0 = { vscale_range(2,1024) }