Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6427,6 +6427,11 @@ defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; +def : Pat<(v8i16 (AArch64dup (i32 (int_aarch64_neon_uaddlv (v8i8 V64:$Rn))))), + (v8i16 (DUPv8i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (UADDLVv8i8v V64:$Rn), hsub), + (i64 0)))>; + multiclass SIMDAcrossLaneLongPairIntrinsic { // Patterns for addv(addlp(x)) ==> addlv def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, Index: llvm/test/CodeGen/AArch64/neon-addlv.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-addlv.ll +++ llvm/test/CodeGen/AArch64/neon-addlv.ll @@ -177,3 +177,21 @@ %0 = and i32 %vaddlv.i, 65535 ret i32 %0 } + +define dso_local <8 x i8> @bar(<8 x i8> noundef %a) local_unnamed_addr #0 { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: rshrn v0.8b, v0.8h, #3 +; CHECK-NEXT: ret +entry: + %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) + %0 = trunc i32 %vaddlv.i to i16 + %vecinit.i = insertelement <8 x i16> undef, i16 %0, i64 0 + %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer + %vrshrn_n2 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %vecinit7.i, i32 3) + ret <8 x i8> %vrshrn_n2 +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)