Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1201,6 +1201,17 @@ (ADR_LSL_ZZZ_D_2 $Op1, $Op2)>; def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)), (ADR_LSL_ZZZ_D_3 $Op1, $Op2)>; + + def : Pat<(i32 (int_aarch64_neon_sqadd (i32 FPR32Op:$Rd), + (i32 (vector_extract (v4i32 (int_aarch64_neon_sqdmull + (v4i16 V64:$Rm), + (v4i16 V64:$Rn))), + (i64 0))))), + (EXTRACT_SUBREG (SQDMLALv4i16_indexed + (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), + V64:$Rm, (INSERT_SUBREG + (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), + (i64 0)), ssub)>; } // End HasSVE let Predicates = [HasSVEorStreamingSVE] in { Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -52,6 +52,8 @@ declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) #1 + declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) @@ -3235,6 +3237,25 @@ ret <4 x i32> %vqdmlal4.i } +define i32 @test_vqdmlal_lane_s16_0_i32(i32 %a, i16 %b, <4 x i16> %c) { +; CHECK-LABEL: test_vqdmlal_lane_s16_0_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqdmlal v2.4s, v1.4h, v0.h[0] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret +entry: + %0 = insertelement <4 x i16> undef, i16 %b, i64 0 + %1 = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> + %vqdmlXl = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %0, <4 x i16> %1) + %lane0 = extractelement <4 x i32> %vqdmlXl, i64 0 + %vqdmlXl1 = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %lane0) + ret i32 %vqdmlXl1 +} + + define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s32_0: ; CHECK: // %bb.0: // %entry