diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1059,12 +1059,8 @@ } /// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'. - SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) { - assert(MulImm.getSignificantBits() <= VT.getSizeInBits() && - "Immediate does not fit VT"); - return getNode(ISD::VSCALE, DL, VT, - getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT)); - } + SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, + bool ConstantFold = true); /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc. SDValue getGLOBAL_OFFSET_TABLE(EVT VT) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1935,6 +1935,27 @@ return SDValue(CondCodeNodes[Cond], 0); } +SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, + bool ConstantFold) { + assert(MulImm.getSignificantBits() <= VT.getSizeInBits() && + "Immediate does not fit VT"); + + MulImm = MulImm.sextOrTrunc(VT.getSizeInBits()); + + if (ConstantFold) { + const MachineFunction &MF = getMachineFunction(); + auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange); + if (Attr.isValid()) { + unsigned VScaleMin = Attr.getVScaleRangeMin(); + if (std::optional VScaleMax = Attr.getVScaleRangeMax()) + if (*VScaleMax == VScaleMin) + return getConstant(MulImm * VScaleMin, DL, VT); + } + } + + return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); +} + SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { APInt One(ResVT.getScalarSizeInBits(), 1); return getStepVector(DL, ResVT, One); diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -209,18 +209,11 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ptrue p1.d, vl8 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1] -; CHECK-NEXT: st1d { z0.d }, p0, [x9, x8, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -84,16 +84,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -154,16 +149,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: mov x8, #8 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: cmp x8, #8 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -192,16 +182,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: mov x8, #8 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #8 ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -262,16 +247,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov x8, #16 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -300,16 +280,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov x8, #16 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b @@ -340,16 +315,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b @@ -468,16 +436,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldr q0, [sp, #16] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -490,16 +451,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w10, #4 -; CHECK-NEXT: subs x9, x9, #4 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x9, xzr, x9, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x9, #4 -; CHECK-NEXT: csel x9, x9, x10, lo -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x10, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -374,16 +374,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp, #16] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -397,17 +390,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: subs x8, x8, #4 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3] +; CHECK-NEXT: st1d { z1.d }, p0, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload