Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5858,24 +5858,29 @@ defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; -// Patterns for uaddv(uaddlp(x)) ==> uaddlv -def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, - (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), - (i64 0))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (UADDLVv8i8v V64:$op), hsub), ssub)>; -def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp - (v16i8 V128:$op))))), (i64 0))), - (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (UADDLVv16i8v V128:$op), hsub), ssub)>; -def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>; - -// Patterns for addp(uaddlp(x))) ==> uaddlv -def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>; -def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>; +multiclass SIMDAcrossLaneLongPairIntrinsic { + // Patterns for uaddv(uaddlp(x)) ==> uaddlv + def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, + (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), + (i64 0))), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast(Opc#"v8i8v") V64:$op), hsub), ssub)>; + def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp + (v16i8 V128:$op))))), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast(Opc#"v16i8v") V128:$op), hsub), ssub)>; + def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast(Opc#"v8i16v") V128:$op), ssub)>; + + // Patterns for addp(uaddlp(x))) ==> uaddlv + def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast(Opc#"v4i16v") V64:$op), ssub)>; + def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast(Opc#"v4i32v") V128:$op), dsub)>; +} + +defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; +defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. Index: llvm/test/CodeGen/AArch64/vecreduce-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -28,8 +28,7 @@ define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) { ; CHECK-LABEL: add_v4i32_v4i64_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.2d, v0.4s -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: saddlv d0, v0.4s ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: @@ -79,8 +78,7 @@ define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) { ; CHECK-LABEL: add_v8i16_v8i32_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.4s, v0.8h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: saddlv s0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -177,8 +175,7 @@ ; CHECK-LABEL: add_v4i16_v4i64_sext: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddlp v0.2d, v0.4s -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: saddlv d0, v0.4s ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: @@ -295,8 +292,7 @@ ; CHECK-BASE-LABEL: add_v8i8_v8i32_sext: ; CHECK-BASE: // %bb.0: // %entry ; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h -; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: saddlv s0, v0.8h ; CHECK-BASE-NEXT: fmov w0, s0 ; CHECK-BASE-NEXT: ret ; @@ -596,8 +592,7 @@ define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) { ; CHECK-LABEL: add_v4i32_v4i64_acc_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.2d, v0.4s -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: saddlv d0, v0.4s ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 ; CHECK-NEXT: ret @@ -655,8 +650,7 @@ define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) { ; CHECK-LABEL: add_v8i16_v8i32_acc_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.4s, v0.8h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: saddlv s0, v0.8h ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret @@ -768,8 +762,7 @@ ; CHECK-LABEL: add_v4i16_v4i64_acc_sext: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddlp v0.2d, v0.4s -; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: saddlv d0, v0.4s ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 ; CHECK-NEXT: ret @@ -901,8 +894,7 @@ ; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext: ; CHECK-BASE: // %bb.0: // %entry ; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h -; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: saddlv s0, v0.8h ; CHECK-BASE-NEXT: fmov w8, s0 ; CHECK-BASE-NEXT: add w0, w8, w0 ; CHECK-BASE-NEXT: ret @@ -974,8 +966,7 @@ define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) { ; CHECK-LABEL: add_v16i8_v16i16_acc_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.8h, v0.16b -; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: saddlv h0, v0.16b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: sxth w0, w8 @@ -1695,10 +1686,8 @@ define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-LABEL: add_pair_v16i8_v16i16_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddlp v0.8h, v0.16b -; CHECK-NEXT: saddlp v1.8h, v1.16b -; CHECK-NEXT: addv h0, v0.8h -; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: saddlv h0, v0.16b +; CHECK-NEXT: saddlv h1, v1.16b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: add w8, w8, w9