Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6995,6 +6995,17 @@ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))>>; +def : Pat<(i64 (AArch64urshri + (i64 (zext + (i32 (and + (i32 (int_aarch64_neon_uaddlv (v8i8 V64:$Rn))), (i32 65535))))), + (i32 vecshiftR64:$imm))), + (i64 (URSHRd + (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UADDLVv8i8v V64:$Rn), hsub), dsub), + vecshiftR64:$imm))>; + //---------------------------------------------------------------------------- // AdvSIMD vector shift instructions //---------------------------------------------------------------------------- Index: llvm/test/CodeGen/AArch64/remove-and-fmov-between-uaddlv-urshl.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/remove-and-fmov-between-uaddlv-urshl.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define <8 x i8> @test1(<8 x i8> noundef %a) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: urshr d0, d0, #3 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: dup v0.8b, w8 +; CHECK-NEXT: ret +entry: + %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) + %0 = and i32 %vaddlv.i, 65535 + %conv = zext i32 %0 to i64 + %vrshr_n = tail call i64 @llvm.aarch64.neon.urshl.i64(i64 %conv, i64 -3) + %conv1 = trunc i64 %vrshr_n to i8 + %vecinit.i = insertelement <8 x i8> undef, i8 %conv1, i64 0 + %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> poison, <8 x i32> zeroinitializer + ret <8 x i8> %vecinit7.i +} + +declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) #1 +declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) #1