diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -475,6 +475,7 @@ SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitFREEZE(SDNode *N); + SDValue visitVSCALE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitVP_FADD(SDNode *N); @@ -1974,6 +1975,7 @@ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); + case ISD::VSCALE: return visitVSCALE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -14839,6 +14841,22 @@ return CombineConsecutiveLoads(N, VT); } +SDValue DAGCombiner::visitVSCALE(SDNode *N) { + // Constant fold vscale if the min and max value are known. + auto Attr = DAG.getMachineFunction().getFunction().getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) + return SDValue(); + + unsigned VScaleMin = Attr.getVScaleRangeMin(); + std::optional VScaleMax = Attr.getVScaleRangeMax(); + if (!VScaleMax || VScaleMin != VScaleMax) + return SDValue(); + + APInt C = N->getConstantOperandAPInt(0); + C *= VScaleMin; + return DAG.getConstant(C, SDLoc(N), N->getValueType(0)); +} + SDValue DAGCombiner::visitFREEZE(SDNode *N) { SDValue N0 = N->getOperand(0); diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -209,14 +209,13 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: subs x9, x8, #8 ; CHECK-NEXT: ptrue p1.d, vl8 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: csel x9, xzr, x9, lo +; CHECK-NEXT: cmp x9, #8 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -84,7 +84,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: subs x8, x8, #4 ; CHECK-NEXT: ptrue p0.d @@ -154,7 +154,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ptrue p0.s @@ -192,7 +192,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ptrue p0.d @@ -262,7 +262,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.h @@ -300,7 +300,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.s @@ -340,14 +340,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: subs x9, x8, #16 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: csel x9, xzr, x9, lo +; CHECK-NEXT: cmp x9, #16 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s @@ -468,16 +467,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldr q0, [sp, #16] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -490,14 +482,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w10, #4 -; CHECK-NEXT: subs x9, x9, #4 +; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x9, xzr, x9, lo +; CHECK-NEXT: subs x10, x9, #4 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x9, #4 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: csel x10, xzr, x10, lo +; CHECK-NEXT: cmp x10, #4 +; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x10, x9, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -374,16 +374,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp, #16] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -397,15 +390,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: subs x9, x8, #4 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: csel x9, xzr, x9, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x9, #4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]