Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5034,6 +5034,8 @@ } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); + if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) + return getVScale(DL, VT, Operand.getConstantOperandAPInt(0)); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -129,8 +129,12 @@ "merged with destructive operations", []>; +def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", + "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; + def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", - "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>; + "Enable Scalable Vector Extension 2 (SVE2) instructions", + [FeatureSVE, FeatureUseScalarIncVL]>; def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -195,6 +195,8 @@ : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), "NegativeImmediates">; +def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; + def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1452,16 +1452,18 @@ defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>; +} - defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">; - defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">; - defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">; - defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">; - defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">; - defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">; - defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">; - defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">; + defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>; + defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>; + defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>; + defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>; + defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>; + defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>; + defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>; + defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>; +let Predicates = [HasSVEorStreamingSVE] in { defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; @@ -1893,6 +1895,72 @@ def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>; } + let AddedComplexity = 5 in { + def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), + (ADDVL_XXI GPR64:$op, $imm)>; + + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))), + (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), $imm), + sub_32))>; + + def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + (INCH_ZPiI ZPR:$op, 31, $imm)>; + def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + (INCW_ZPiI ZPR:$op, 31, $imm)>; + def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + (INCD_ZPiI ZPR:$op, 31, $imm)>; + + def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + (DECH_ZPiI ZPR:$op, 31, $imm)>; + def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + (DECW_ZPiI ZPR:$op, 31, $imm)>; + def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + (DECD_ZPiI ZPR:$op, 31, $imm)>; + } + + let Predicates = [HasSVE, UseScalarIncVL], AddedComplexity = 5 in { + def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))), + (INCH_XPiI GPR64:$op, 31, $imm)>; + def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))), + (INCW_XPiI GPR64:$op, 31, $imm)>; + def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))), + (INCD_XPiI GPR64:$op, 31, $imm)>; + + def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))), + (DECH_XPiI GPR64:$op, 31, $imm)>; + def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))), + (DECW_XPiI GPR64:$op, 31, $imm)>; + def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))), + (DECD_XPiI GPR64:$op, 31, $imm)>; + + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))), + (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))), + (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))), + (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))), + (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))), + (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))), + (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$op, sub_32), 31, $imm), + sub_32))>; + } + def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), (ADDVL_XXI GPR64:$op, $imm)>; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -120,6 +120,7 @@ // SVE extensions bool HasSVE = false; bool UseExperimentalZeroingPseudos = false; + bool UseScalarIncVL = false; // Armv8.2 Crypto extensions bool HasSM4 = false; @@ -451,6 +452,8 @@ return UseExperimentalZeroingPseudos; } + bool useScalarIncVL() const { return UseScalarIncVL; } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -920,13 +920,43 @@ let Constraints = "$Rdn = $_Rdn"; } -multiclass sve_int_pred_pattern_a opc, string asm> { - def NAME : sve_int_pred_pattern_a; +multiclass sve_int_pred_pattern_a opc, string asm, + SDPatternOperator op, + SDPatternOperator opcnt> { + let Predicates = [HasSVEorStreamingSVE] in { + def NAME : sve_int_pred_pattern_a; + + def : InstAlias(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR64:$Rdn, 0b11111, 1), 2>; + } - def : InstAlias(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>; - def : InstAlias(NAME) GPR64:$Rdn, 0b11111, 1), 2>; + let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in { + def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))), + (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>; + + def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))), + (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; + + def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))), + (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; + + def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))), + (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1), + sub_32))>; + + def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))), + (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), + sub_32))>; + + def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))), + (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), + sub_32))>; + } } class sve_int_pred_pattern_b opc, string asm, RegisterOperand dt, Index: llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll =================================================================== --- llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -39,15 +39,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov w10, #256 -; CHECK-NEXT: cmp x8, #256 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl] +; CHECK-NEXT: cmp x8, #256 +; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -622,23 +622,23 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w10, #16 -; CHECK-NEXT: sub x9, x9, #1 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #16 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csel x9, x9, x10, lo -; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] -; CHECK-NEXT: add x10, x8, x9, lsl #2 -; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl] +; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: st1w { z7.s }, p0, [x8, #7, mul vl] -; CHECK-NEXT: st1w { z4.s }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1w { z5.s }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1w { z6.s }, p0, [x8, #6, mul vl] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: add x10, x9, x8, lsl #2 +; CHECK-NEXT: st1w { z7.s }, p0, [x9, #7, mul vl] +; CHECK-NEXT: st1w { z4.s }, p0, [x9, #4, mul vl] +; CHECK-NEXT: st1w { z5.s }, p0, [x9, #5, mul vl] +; CHECK-NEXT: st1w { z6.s }, p0, [x9, #6, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl] ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x10, #3, mul vl] Index: llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -116,12 +116,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov x8, #-16 ; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: sub x8, x8, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr q0, [x9, x8] Index: llvm/test/CodeGen/AArch64/sve-gep.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-gep.ll +++ llvm/test/CodeGen/AArch64/sve-gep.ll @@ -202,10 +202,8 @@ define *> @scalable_of_scalable_1(* %base) { ; CHECK-LABEL: scalable_of_scalable_1: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov z1.d, x0 -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: incd z0.d, all, mul #8 ; CHECK-NEXT: ret %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , * %base, %idx @@ -215,9 +213,7 @@ define *> @scalable_of_scalable_2(*> %base) { ; CHECK-LABEL: scalable_of_scalable_2: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: incd z0.d, all, mul #8 ; CHECK-NEXT: ret %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , *> %base, %idx Index: llvm/test/CodeGen/AArch64/sve-insert-element.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -503,16 +503,16 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #2 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 -; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl] +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: st1b { z0.b }, p1, [sp] ; CHECK-NEXT: strb w0, [x10, x8] Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -139,12 +139,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov x8, #-16 ; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: sub x8, x8, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: str q1, [x9, x8] Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll @@ -1,4 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC ; ; CNTB @@ -6,16 +8,28 @@ define i64 @cntb() { ; CHECK-LABEL: cntb: -; CHECK: cntb x0, vl2 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x0, vl2 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntb: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntb x0, vl2 +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntb(i32 2) ret i64 %out } define i64 @cntb_mul3() { ; CHECK-LABEL: cntb_mul3: -; CHECK: cntb x0, vl6, mul #3 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x0, vl6, mul #3 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntb_mul3: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntb x0, vl6, mul #3 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6) %out = mul i64 %cnt, 3 ret i64 %out @@ -23,8 +37,14 @@ define i64 @cntb_mul4() { ; CHECK-LABEL: cntb_mul4: -; CHECK: cntb x0, vl8, mul #4 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x0, vl8, mul #4 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntb_mul4: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntb x0, vl8, mul #4 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8) %out = mul i64 %cnt, 4 ret i64 %out @@ -36,16 +56,28 @@ define i64 @cnth() { ; CHECK-LABEL: cnth: -; CHECK: cnth x0, vl3 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x0, vl3 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cnth: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cnth x0, vl3 +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cnth(i32 3) ret i64 %out } define i64 @cnth_mul5() { ; CHECK-LABEL: cnth_mul5: -; CHECK: cnth x0, vl7, mul #5 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x0, vl7, mul #5 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cnth_mul5: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cnth x0, vl7, mul #5 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 7) %out = mul i64 %cnt, 5 ret i64 %out @@ -53,8 +85,14 @@ define i64 @cnth_mul8() { ; CHECK-LABEL: cnth_mul8: -; CHECK: cnth x0, vl5, mul #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x0, vl5, mul #8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cnth_mul8: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cnth x0, vl5, mul #8 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 5) %out = mul i64 %cnt, 8 ret i64 %out @@ -66,16 +104,28 @@ define i64 @cntw() { ; CHECK-LABEL: cntw: -; CHECK: cntw x0, vl4 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x0, vl4 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntw: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntw x0, vl4 +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntw(i32 4) ret i64 %out } define i64 @cntw_mul11() { ; CHECK-LABEL: cntw_mul11: -; CHECK: cntw x0, vl8, mul #11 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x0, vl8, mul #11 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntw_mul11: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntw x0, vl8, mul #11 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntw(i32 8) %out = mul i64 %cnt, 11 ret i64 %out @@ -83,8 +133,14 @@ define i64 @cntw_mul2() { ; CHECK-LABEL: cntw_mul2: -; CHECK: cntw x0, vl6, mul #2 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x0, vl6, mul #2 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntw_mul2: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntw x0, vl6, mul #2 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntw(i32 6) %out = mul i64 %cnt, 2 ret i64 %out @@ -96,16 +152,28 @@ define i64 @cntd() { ; CHECK-LABEL: cntd: -; CHECK: cntd x0, vl5 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x0, vl5 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntd: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntd x0, vl5 +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntd(i32 5) ret i64 %out } define i64 @cntd_mul15() { ; CHECK-LABEL: cntd_mul15: -; CHECK: cntd x0, vl16, mul #15 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x0, vl16, mul #15 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntd_mul15: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntd x0, vl16, mul #15 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 9) %out = mul i64 %cnt, 15 ret i64 %out @@ -113,8 +181,14 @@ define i64 @cntd_mul16() { ; CHECK-LABEL: cntd_mul16: -; CHECK: cntd x0, vl32, mul #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x0, vl32, mul #16 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntd_mul16: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntd x0, vl32, mul #16 +; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 10) %out = mul i64 %cnt, 16 ret i64 %out @@ -126,8 +200,14 @@ define i64 @cntp_b8( %pg, %a) { ; CHECK-LABEL: cntp_b8: -; CHECK: cntp x0, p0, p1.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, p0, p1.b +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntp_b8: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.b +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %a) ret i64 %out @@ -135,8 +215,14 @@ define i64 @cntp_b16( %pg, %a) { ; CHECK-LABEL: cntp_b16: -; CHECK: cntp x0, p0, p1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, p0, p1.h +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntp_b16: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.h +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %a) ret i64 %out @@ -144,8 +230,14 @@ define i64 @cntp_b32( %pg, %a) { ; CHECK-LABEL: cntp_b32: -; CHECK: cntp x0, p0, p1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, p0, p1.s +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntp_b32: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.s +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %a) ret i64 %out @@ -153,13 +245,311 @@ define i64 @cntp_b64( %pg, %a) { ; CHECK-LABEL: cntp_b64: -; CHECK: cntp x0, p0, p1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, p0, p1.d +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: cntp_b64: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.d +; USE_SCALAR_INC-NEXT: ret %out = call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %a) ret i64 %out } +; +; INCB +; + +define i64 @incb(i64 %a) { +; CHECK-LABEL: incb: +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x8, vl5 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incb: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incb x0, vl5 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5) + %out = add i64 %cnt, %a + ret i64 %out +} + +define i64 @incb_mul(i64 %a) { +; CHECK-LABEL: incb_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x8, vl4 +; CHECK-NEXT: add x0, x0, x8, lsl #2 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incb_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incb x0, vl4, mul #4 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 4) + %mul = mul i64 %cnt, 4 + %out = add i64 %mul, %a + ret i64 %out +} + +; +; DECB +; + +define i64 @decb(i64 %a) { +; CHECK-LABEL: decb: +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x8, vl6 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decb: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decb x0, vl6 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6) + %out = sub i64 %a, %cnt + ret i64 %out +} + +define i64 @decb_mul(i64 %a) { +; CHECK-LABEL: decb_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntb x8, vl7 +; CHECK-NEXT: sub x0, x0, x8, lsl #3 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decb_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decb x0, vl7, mul #8 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 7) + %mul = mul i64 %cnt, 8 + %out = sub i64 %a, %mul + ret i64 %out +} + +; +; INCH +; + +define i64 @inch(i64 %a) { +; CHECK-LABEL: inch: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8, vl4 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: inch: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: inch x0, vl4 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 4) + %out = add i64 %cnt, %a + ret i64 %out +} + +define i64 @inch_mul(i64 %a) { +; CHECK-LABEL: inch_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8, vl8, mul #5 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: inch_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: inch x0, vl8, mul #5 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8) + %mul = mul i64 %cnt, 5 + %out = add i64 %mul, %a + ret i64 %out +} + +; +; DECH +; + +define i64 @dech(i64 %a) { +; CHECK-LABEL: dech: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8, vl1 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: dech: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: dech x0, vl1 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1) + %out = sub i64 %a, %cnt + ret i64 %out +} + +define i64 @dech_mul(i64 %a) { +; CHECK-LABEL: dech_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8, vl16, mul #7 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: dech_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: dech x0, vl16, mul #7 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9) + %mul = mul i64 %cnt, 7 + %out = sub i64 %a, %mul + ret i64 %out +} + +; +; INCW +; + +define i64 @incw(i64 %a) { +; CHECK-LABEL: incw: +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x8, #16 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incw: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incw x0, #16 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16) + %out = add i64 %cnt, %a + ret i64 %out +} + +define i64 @incw_mul(i64 %a) { +; CHECK-LABEL: incw_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x8, vl32, mul #12 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incw_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incw x0, vl32, mul #12 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10) + %mul = mul i64 %cnt, 12 + %out = add i64 %mul, %a + ret i64 %out +} + +; +; DECW +; + +define i64 @decw(i64 %a) { +; CHECK-LABEL: decw: +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decw: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decw x0 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31) + %out = sub i64 %a, %cnt + ret i64 %out +} + +define i64 @decw_mul(i64 %a) { +; CHECK-LABEL: decw_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntw x8, vl128 +; CHECK-NEXT: sub x0, x0, x8, lsl #4 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decw_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decw x0, vl128, mul #16 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12) + %mul = mul i64 %cnt, 16 + %out = sub i64 %a, %mul + ret i64 %out +} + +define i64 @incd(i64 %a) { +; CHECK-LABEL: incd: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8, vl8 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incd: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incd x0, vl8 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 8) + %out = add i64 %cnt, %a + ret i64 %out +} + +define i64 @incd_mul(i64 %a) { +; CHECK-LABEL: incd_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8, all, mul #15 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: incd_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: incd x0, all, mul #15 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) + %mul = mul i64 %cnt, 15 + %out = add i64 %mul, %a + ret i64 %out +} + +; +; DECD +; + +define i64 @decd(i64 %a) { +; CHECK-LABEL: decd: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8, #16 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decd: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decd x0, #16 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16) + %out = sub i64 %a, %cnt + ret i64 %out +} + +define i64 @decd_mul(i64 %a) { +; CHECK-LABEL: decd_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x8, vl2, mul #9 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret +; +; USE_SCALAR_INC-LABEL: decd_mul: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: decd x0, vl2, mul #9 +; USE_SCALAR_INC-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2) + %mul = mul i64 %cnt, 9 + %out = sub i64 %a, %mul + ret i64 %out +} + declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) declare i64 @llvm.aarch64.sve.cntw(i32 %pattern) Index: llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -23,15 +23,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #2 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: st1b { z1.b }, p0, [x10, #1, mul vl] +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: ldrb w0, [x10, x8] ; CHECK-NEXT: addvl sp, sp, #2 @@ -48,15 +48,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 @@ -145,15 +145,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov w10, #128 -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] +; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -169,19 +169,19 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w10, #34464 ; CHECK-NEXT: movk w10, #1, lsl #16 -; CHECK-NEXT: sub x9, x9, #1 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl] +; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldr w0, [x8, x9, lsl #2] +; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -23,14 +23,14 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #2 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x1, x8 -; CHECK-NEXT: csel x8, x1, x8, lo +; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl] +; CHECK-NEXT: cmp x1, x8 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: csel x8, x1, x8, lo ; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] @@ -135,14 +135,14 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: rdvl x8, #2 +; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w10, #128 -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: st1h { z3.h }, p0, [x9, #3, mul vl] +; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: st1h { z2.h }, p0, [x9, #2, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] Index: llvm/test/CodeGen/AArch64/sve-stepvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -48,10 +48,9 @@ define @stepvector_nxv4i64() { ; CHECK-LABEL: stepvector_nxv4i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: add z1.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: incd z1.d ; CHECK-NEXT: ret entry: %0 = call @llvm.experimental.stepvector.nxv4i64() @@ -61,14 +60,13 @@ define @stepvector_nxv16i32() { ; CHECK-LABEL: stepvector_nxv16i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntw x9 -; CHECK-NEXT: cnth x8 ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z1.s, w9 -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: add z1.s, z0.s, z1.s -; CHECK-NEXT: add z2.s, z0.s, z3.s -; CHECK-NEXT: add z3.s, z1.s, z3.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: incw z1.s +; CHECK-NEXT: incw z2.s, all, mul #2 +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: incw z3.s, all, mul #2 ; CHECK-NEXT: ret entry: %0 = call @llvm.experimental.stepvector.nxv16i32() Index: llvm/test/CodeGen/AArch64/sve-vl-arith.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-vl-arith.ll @@ -0,0 +1,425 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s + +define @inch_vec( %a) { +; NO_SCALAR_INC-LABEL: inch_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: inch z0.h +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: inch_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: inch z0.h +; CHECK-NEXT: ret + %vscale = call i16 @llvm.vscale.i16() + %mul = mul i16 %vscale, 8 + %vl = insertelement undef, i16 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = add %a, %vl.splat + ret %res +} + +define @incw_vec( %a) { +; NO_SCALAR_INC-LABEL: incw_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: incw z0.s +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incw_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: incw z0.s +; CHECK-NEXT: ret + %vscale = call i32 @llvm.vscale.i32() + %mul = mul i32 %vscale, 4 + %vl = insertelement undef, i32 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = add %a, %vl.splat + ret %res +} + +define @incd_vec( %a) { +; NO_SCALAR_INC-LABEL: incd_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: incd z0.d +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incd_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: incd z0.d +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 2 + %vl = insertelement undef, i64 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = add %a, %vl.splat + ret %res +} + +define @dech_vec( %a) { +; NO_SCALAR_INC-LABEL: dech_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: dech z0.h, all, mul #2 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: dech_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: dech z0.h, all, mul #2 +; CHECK-NEXT: ret + %vscale = call i16 @llvm.vscale.i16() + %mul = mul i16 %vscale, 16 + %vl = insertelement undef, i16 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = sub %a, %vl.splat + ret %res +} + +define @decw_vec( %a) { +; NO_SCALAR_INC-LABEL: decw_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: decw z0.s, all, mul #4 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decw_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: decw z0.s, all, mul #4 +; CHECK-NEXT: ret + %vscale = call i32 @llvm.vscale.i32() + %mul = mul i32 %vscale, 16 + %vl = insertelement undef, i32 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = sub %a, %vl.splat + ret %res +} + +define @decd_vec( %a) { +; NO_SCALAR_INC-LABEL: decd_vec: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: decd z0.d, all, mul #8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decd_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: decd z0.d, all, mul #8 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 16 + %vl = insertelement undef, i64 %mul, i32 0 + %vl.splat = shufflevector %vl, undef, zeroinitializer + %res = sub %a, %vl.splat + ret %res +} + +; NOTE: As there is no need for the predicate pattern we +; fall back to using ADDVL with its larger immediate range. +define i64 @incb_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: incb_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: addvl x0, x0, #1 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incb_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x0, x0, #1 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 16 + %add = add i64 %a, %mul + ret i64 %add +} + +define i64 @inch_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: inch_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: inch_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: inch x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 8 + %add = add i64 %a, %mul + ret i64 %add +} + +define i64 @incw_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: incw_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incw_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: incw x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 4 + %add = add i64 %a, %mul + ret i64 %add +} + +define i64 @incd_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: incd_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incd_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: incd x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 2 + %add = add i64 %a, %mul + ret i64 %add +} + +; NOTE: As there is no need for the predicate pattern we +; fall back to using ADDVL with its larger immediate range. +define i64 @decb_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: decb_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: addvl x0, x0, #-2 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decb_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x0, x0, #-2 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 32 + %sub = sub i64 %a, %mul + ret i64 %sub +} + +define i64 @dech_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: dech_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: dech_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: dech x0, all, mul #3 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 24 + %sub = sub i64 %a, %mul + ret i64 %sub +} + +define i64 @decw_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: decw_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decw_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: decw x0, all, mul #3 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 12 + %sub = sub i64 %a, %mul + ret i64 %sub +} + +define i64 @decd_scalar_i64(i64 %a) { +; NO_SCALAR_INC-LABEL: decd_scalar_i64: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add x0, x0, x8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decd_scalar_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: decd x0, all, mul #3 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 6 + %sub = sub i64 %a, %mul + ret i64 %sub +} + +; NOTE: As there is no need for the predicate pattern we +; fall back to using ADDVL with its larger immediate range. +define i32 @incb_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: incb_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0 +; NO_SCALAR_INC-NEXT: addvl x0, x0, #3 +; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: incb_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: addvl x0, x0, #3 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 48 + %vl = trunc i64 %mul to i32 + %add = add i32 %a, %vl + ret i32 %add +} + +define i32 @inch_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: inch_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8, all, mul #7 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: inch_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: inch x0, all, mul #7 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 56 + %vl = trunc i64 %mul to i32 + %add = add i32 %a, %vl + ret i32 %add +} + +define i32 @incw_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: incw_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8, all, mul #7 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: incw_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incw x0, all, mul #7 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 28 + %vl = trunc i64 %mul to i32 + %add = add i32 %a, %vl + ret i32 %add +} + +define i32 @incd_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: incd_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8, all, mul #7 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: incd_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incd x0, all, mul #7 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 14 + %vl = trunc i64 %mul to i32 + %add = add i32 %a, %vl + ret i32 %add +} + +; NOTE: As there is no need for the predicate pattern we +; fall back to using ADDVL with its larger immediate range. +define i32 @decb_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: decb_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0 +; NO_SCALAR_INC-NEXT: addvl x0, x0, #-4 +; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: decb_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: addvl x0, x0, #-4 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 64 + %vl = trunc i64 %mul to i32 + %sub = sub i32 %a, %vl + ret i32 %sub +} + +define i32 @dech_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: dech_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: dech_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dech x0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 8 + %vl = trunc i64 %mul to i32 + %sub = sub i32 %a, %vl + ret i32 %sub +} + +define i32 @decw_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: decw_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret + +; CHECK-LABEL: decw_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decw x0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 4 + %vl = trunc i64 %mul to i32 + %sub = sub i32 %a, %vl + ret i32 %sub +} + +define i32 @decd_scalar_i32(i32 %a) { +; NO_SCALAR_INC-LABEL: decd_scalar_i32: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8 +; NO_SCALAR_INC-NEXT: neg x8, x8 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decd_scalar_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decd x0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, 2 + %vl = trunc i64 %mul to i32 + %sub = sub i32 %a, %vl + ret i32 %sub +} + +declare i16 @llvm.vscale.i16() +declare i32 @llvm.vscale.i32() +declare i64 @llvm.vscale.i64()