diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4852,29 +4852,29 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ii { +multiclass sve_int_index_ii { def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; - def : Pat<(nxv16i8 (op simm5_8b:$imm5b)), + def : Pat<(nxv16i8 (step_vector simm5_8b:$imm5b)), (!cast(NAME # "_B") (i32 0), simm5_8b:$imm5b)>; - def : Pat<(nxv8i16 (op simm5_16b:$imm5b)), + def : Pat<(nxv8i16 (step_vector simm5_16b:$imm5b)), (!cast(NAME # "_H") (i32 0), simm5_16b:$imm5b)>; - def : Pat<(nxv4i32 (op simm5_32b:$imm5b)), + def : Pat<(nxv4i32 (step_vector simm5_32b:$imm5b)), (!cast(NAME # "_S") (i32 0), simm5_32b:$imm5b)>; - def : Pat<(nxv2i64 (op simm5_64b:$imm5b)), + def : Pat<(nxv2i64 (step_vector simm5_64b:$imm5b)), (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; - def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; - def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } @@ -4895,49 +4895,53 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ir { +multiclass sve_int_index_ir { def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; - def : Pat<(nxv16i8 (op GPR32:$Rm)), - (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv8i16 (op GPR32:$Rm)), - (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv4i32 (op GPR32:$Rm)), - (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv2i64 (op GPR64:$Rm)), - (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; + def : Pat<(nxv16i8 (step_vector (i32 imm:$imm))), + (!cast(NAME # "_B") (i32 0), (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(nxv8i16 (step_vector (i32 imm:$imm))), + (!cast(NAME # "_H") (i32 0), (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(nxv4i32 (step_vector (i32 imm:$imm))), + (!cast(NAME # "_S") (i32 0), (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(nxv2i64 (step_vector (i64 imm:$imm))), + (!cast(NAME # "_D") (i64 0), (!cast("MOVi64imm") imm:$imm))>; + def : Pat<(nxv2i64 (step_vector (i64 !cast("i64imm_32bit"):$imm))), + (!cast(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; + def : Pat<(add (nxv16i8 (step_vector_oneuse (i32 imm:$imm))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse (i32 imm:$imm))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse (i32 imm:$imm))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 imm:$imm))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") imm:$imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 !cast("i64imm_32bit"):$imm))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). + def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } @@ -4958,20 +4962,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ri { +multiclass sve_int_index_ri { def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; - def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; - def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } @@ -4992,30 +4996,32 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_rr { +multiclass sve_int_index_rr { def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; - - // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse (i32 imm:$imm))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse (i32 imm:$imm))), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse (i32 imm:$imm))), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 imm:$imm))), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") imm:$imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 !cast("i64imm_32bit"):$imm))), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; + + // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). + def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; } diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -246,6 +246,86 @@ ret %3 } +define @mul_stepvector_nxv2i64() { +; CHECK-LABEL: mul_stepvector_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2222 +; CHECK-NEXT: index z0.d, #0, x8 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 2222, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = mul %2, %1 + ret %3 +} + +define @mul_stepvector_bigconst_nxv2i64() { +; CHECK-LABEL: mul_stepvector_bigconst_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #146028888064 +; CHECK-NEXT: index z0.d, #0, x8 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 146028888064, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = mul %2, %1 + ret %3 +} + +define @mul_add_stepvector_nxv2i64(i64 %x) { +; CHECK-LABEL: mul_add_stepvector_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2222 +; CHECK-NEXT: index z0.d, x0, x8 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 2222, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = mul %2, %1 + %4 = insertelement poison, i64 %x, i32 0 + %5 = shufflevector %4, poison, zeroinitializer + %6 = add %3, %5 + ret %6 +} + +define @mul_add_stepvector_bigconst_nxv2i64(i64 %x) { +; CHECK-LABEL: mul_add_stepvector_bigconst_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #146028888064 +; CHECK-NEXT: index z0.d, x0, x8 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 146028888064, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = mul %2, %1 + %4 = insertelement poison, i64 %x, i32 0 + %5 = shufflevector %4, poison, zeroinitializer + %6 = add %3, %5 + ret %6 +} + +define @mul_mul_add_stepvector_nxv2i64(i64 %x, i64 %y) { +; CHECK-LABEL: mul_mul_add_stepvector_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add x8, x0, x0, lsl #1 +; CHECK-NEXT: index z0.d, x1, x8 +; CHECK-NEXT: ret +entry: + %xmul = mul i64 %x, 3 + %0 = insertelement poison, i64 %xmul, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv2i64() + %3 = mul %2, %1 + %4 = insertelement poison, i64 %y, i32 0 + %5 = shufflevector %4, poison, zeroinitializer + %6 = add %3, %5 + ret %6 +} + define @shl_stepvector_nxv8i8() { ; CHECK-LABEL: shl_stepvector_nxv8i8: ; CHECK: // %bb.0: // %entry