diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -259,6 +259,10 @@ def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; +def index_vector_oneuse : PatFrag<(ops node:$base, node:$idx), + (index_vector node:$base, node:$idx), [{ + return N->hasOneUse(); +}]>; def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>; @@ -1362,10 +1366,10 @@ defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - defm INDEX_RR : sve_int_index_rr<"index", index_vector>; - defm INDEX_IR : sve_int_index_ir<"index", index_vector>; - defm INDEX_RI : sve_int_index_ri<"index", index_vector>; - defm INDEX_II : sve_int_index_ii<"index", index_vector>; + defm INDEX_RR : sve_int_index_rr<"index", index_vector, index_vector_oneuse>; + defm INDEX_IR : sve_int_index_ir<"index", index_vector, index_vector_oneuse>; + defm INDEX_RI : sve_int_index_ri<"index", index_vector, index_vector_oneuse>; + defm INDEX_II : sve_int_index_ii<"index", index_vector, index_vector_oneuse>; // Unpredicated shifts defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4777,7 +4777,7 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ii { +multiclass sve_int_index_ii { def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; @@ -4791,6 +4791,16 @@ (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; def : Pat<(nxv2i64 (op simm5_64b:$imm5, simm5_64b:$imm5b)), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; + + // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). + def : Pat<(add (nxv16i8 (oneuseop (i32 0), simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; + def : Pat<(add (nxv8i16 (oneuseop (i32 0), simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; + def : Pat<(add (nxv4i32 (oneuseop (i32 0), simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; + def : Pat<(add (nxv2i64 (oneuseop (i64 0), simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } class sve_int_index_ir sz8_64, string asm, ZPRRegOp zprty, @@ -4810,7 +4820,7 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ir { +multiclass sve_int_index_ir { def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; @@ -4824,6 +4834,17 @@ (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; def : Pat<(nxv2i64 (op simm5_64b:$imm5, GPR64:$Rm)), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; + + // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). + def : Pat<(add (nxv16i8 (oneuseop (i32 0), GPR32:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; + def : Pat<(add (nxv8i16 (oneuseop (i32 0), GPR32:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; + def : Pat<(add (nxv4i32 (oneuseop (i32 0), GPR32:$Rm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; + def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; + } class sve_int_index_ri sz8_64, string asm, ZPRRegOp zprty, @@ -4843,7 +4864,7 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ri { +multiclass sve_int_index_ri { def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; @@ -4857,6 +4878,16 @@ (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; def : Pat<(nxv2i64 (op GPR64:$Rm, simm5_64b:$imm5)), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; + + // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). + def : Pat<(add (nxv16i8 (oneuseop (i32 0), simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; + def : Pat<(add (nxv8i16 (oneuseop (i32 0), simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; + def : Pat<(add (nxv4i32 (oneuseop (i32 0), simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; + def : Pat<(add (nxv2i64 (oneuseop (i64 0), simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } class sve_int_index_rr sz8_64, string asm, ZPRRegOp zprty, @@ -4876,7 +4907,7 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_rr { +multiclass sve_int_index_rr { def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; @@ -4886,6 +4917,16 @@ def : SVE_2_Op_Pat(NAME # _H)>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; + + // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). + def : Pat<(add (nxv16i8 (oneuseop (i32 0), GPR32:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (nxv8i16 (oneuseop (i32 0), GPR32:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (nxv4i32 (oneuseop (i32 0), GPR32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -131,6 +131,107 @@ ret %3 } +define @add_stepvector_nxv8i8_2() { +; CHECK-LABEL: add_stepvector_nxv8i8_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #2, #1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i8 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i8() + %3 = add %2, %1 + ret %3 +} + +define @add_stepvector_nxv8i8_2_commutative() { +; CHECK-LABEL: add_stepvector_nxv8i8_2_commutative: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #2, #1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i8 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i8() + %3 = add %1, %2 + ret %3 +} + +define @add_stepvector_nxv8i16_1(i16 %data) { +; CHECK-LABEL: add_stepvector_nxv8i16_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, w0, #1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i16 %data, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i16() + %3 = add %2, %1 + ret %3 +} + +define @add_stepvector_nxv4i32_1(i32 %data) { +; CHECK-LABEL: add_stepvector_nxv4i32_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.s, w0, #1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i32 %data, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv4i32() + %3 = add %2, %1 + ret %3 +} + +define @multiple_use_stepvector_nxv4i32_1() { +; CHECK-LABEL: multiple_use_stepvector_nxv4i32_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: add z1.s, z1.s, #2 // =0x2 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i32 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv4i32() + %3 = add %2, %1 + %4 = mul %2, %3 + ret %4 +} + +define @add_stepvector_nxv2i64_1(i64 %data) { +; CHECK-LABEL: add_stepvector_nxv2i64_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, x0, #1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 %data, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = add %1, %2 + ret %3 +} + +define @multiple_use_stepvector_nxv2i64_1(i64 %data) { +; CHECK-LABEL: multiple_use_stepvector_nxv2i64_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 %data, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = add %1, %2 + %4 = mul %3, %2 + ret %4 +} + define @mul_stepvector_nxv8i8() { ; CHECK-LABEL: mul_stepvector_nxv8i8: ; CHECK: // %bb.0: // %entry