diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6347,12 +6347,27 @@ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), ssub))>; - def : Pat<(i64 (intOp (v4i32 V128:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), dsub))>; + +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (intOp (v8i8 V64:$Rm))), imm:$Immd)), + (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rm), hsub), 0)>; +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (intOp (v16i8 V128:$Rm))), imm:$Immd)), + (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rm), hsub), 0)>; +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (intOp (v4i16 V64:$Rm))), imm:$Immd)), + (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rm), ssub), 0)>; +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (intOp (v8i16 V128:$Rm))), imm:$Immd)), + (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rm), ssub), 0)>; +def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (intOp (v4i32 V128:$Rm))), imm:$Immd)), + (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rm), dsub), 0)>; } defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; diff --git a/llvm/test/CodeGen/AArch64/neon-scalar-to-vector.ll b/llvm/test/CodeGen/AArch64/neon-scalar-to-vector.ll --- a/llvm/test/CodeGen/AArch64/neon-scalar-to-vector.ll +++ b/llvm/test/CodeGen/AArch64/neon-scalar-to-vector.ll @@ -13,8 +13,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: mov.s v1[0], v0[0] ; CHECK-NEXT: ucvtf.2s v0, v1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -32,8 +31,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: mov.s v0[0], v1[0] ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -50,12 +48,11 @@ ; CHECK-LABEL: _insert_vec_v16i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: mov.s v1[0], v2[0] +; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -71,16 +68,15 @@ ; CHECK-LABEL: _insert_vec_v23i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: add x8, x0, #88 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #16] ; CHECK-NEXT: stp q0, q0, [x0, #48] -; CHECK-NEXT: str d0, [x0, #80] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: add x8, x0, #88 ; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: mov.s v1[0], v2[0] +; CHECK-NEXT: str d0, [x0, #80] +; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret @@ -98,8 +94,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: uaddlv.16b h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: mov.s v1[0], v0[0] ; CHECK-NEXT: ucvtf.2s v0, v1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -118,8 +113,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: uaddlv.8b h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: mov.s v1[0], v0[0] ; CHECK-NEXT: ucvtf.2s v0, v1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -138,8 +132,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: uaddlv.4h s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: mov.s v1[0], v0[0] ; CHECK-NEXT: ucvtf.2s v0, v1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -157,8 +150,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov.d v0[0], x8 +; CHECK-NEXT: mov.d v0[0], v1[0] ; CHECK-NEXT: ucvtf.2d v0, v0 ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] @@ -177,8 +169,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov.d v0[0], x8 +; CHECK-NEXT: mov.d v0[0], v1[0] ; CHECK-NEXT: ucvtf.2d v0, v0 ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] @@ -199,8 +190,7 @@ ; CHECK-NEXT: str wzr, [x0, #16] ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d2, v1 -; CHECK-NEXT: fmov x8, d2 -; CHECK-NEXT: mov.d v1[0], x8 +; CHECK-NEXT: mov.d v1[0], v2[0] ; CHECK-NEXT: ucvtf.2d v1, v1 ; CHECK-NEXT: fcvtn v1.2s, v1.2d ; CHECK-NEXT: mov.d v1[1], v0[0]