Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5653,6 +5653,9 @@ defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; +def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>; + // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. // In effect, opNode is the same as (scalar_to_vector (IntNode)). Index: llvm/test/CodeGen/AArch64/arm64-vabs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -220,8 +220,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uabdl.8h v2, v0, v1 ; CHECK-NEXT: uabal2.8h v2, v0, v1 -; CHECK-NEXT: uaddlp.4s v0, v2 -; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: uaddlv.8h s0, v2 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %aext = zext <16 x i8> %a to <16 x i32> @@ -239,8 +238,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sabdl.8h v2, v0, v1 ; CHECK-NEXT: sabal2.8h v2, v0, v1 -; CHECK-NEXT: uaddlp.4s v0, v2 -; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: uaddlv.8h s0, v2 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %aext = sext <16 x i8> %a to <16 x i32> Index: llvm/test/CodeGen/AArch64/neon-sad.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-sad.ll +++ llvm/test/CodeGen/AArch64/neon-sad.ll @@ -11,8 +11,7 @@ ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uabdl v2.8h, v1.8b, v0.8b ; CHECK-NEXT: uabal2 v2.8h, v1.16b, v0.16b -; CHECK-NEXT: uaddlp v0.4s, v2.8h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: uaddlv s0, v2.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -35,8 +34,7 @@ ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sabdl v2.8h, v1.8b, v0.8b ; CHECK-NEXT: sabal2 v2.8h, v1.16b, v0.16b -; CHECK-NEXT: uaddlp v0.4s, v2.8h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: uaddlv s0, v2.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: