diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1365,8 +1365,8 @@ defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp", int_aarch64_sve_sqdecp_n64>; defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp", int_aarch64_sve_uqdecp_n32>; defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp", int_aarch64_sve_uqdecp_n64>; - defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">; - defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">; + defm INCP_XP : sve_int_count_r_x64<0b10000, "incp", null_frag, add>; + defm DECP_XP : sve_int_count_r_x64<0b10100, "decp", null_frag, sub>; defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp", int_aarch64_sve_sqincp>; defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp", int_aarch64_sve_uqincp>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -263,6 +263,11 @@ def sve_cnt_mul_imm : ComplexPattern">; def sve_cnt_shl_imm : ComplexPattern">; +def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2), + (int_aarch64_sve_cntp node:$pred, node:$src2), [{ + return N->hasOneUse(); +}]>; + //===----------------------------------------------------------------------===// // SVE PTrue - These are used extensively throughout the pattern matching so // it's important we define them first. @@ -664,7 +669,8 @@ } multiclass sve_int_count_r_x64 opc, string asm, - SDPatternOperator op = null_frag> { + SDPatternOperator op, + SDPatternOperator combine_op = null_frag> { def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>; def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>; def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>; @@ -678,6 +684,16 @@ (!cast(NAME # _S) PPRAny:$Pg, $Rn)>; def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))), (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; + + // Combine cntp with combine_op + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))), + (!cast(NAME # _B) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))), + (!cast(NAME # _H) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 (SVEAllActive)), (nxv4i1 PPRAny:$pred)))), + (!cast(NAME # _S) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))), + (!cast(NAME # _D) PPRAny:$pred, $Rn)>; } class sve_int_count_v sz8_64, bits<5> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; INCP + +define i64 @cntp_add_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.b +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) + %add = add i64 %2, %x + ret i64 %add +} + +define i64 @cntp_add_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) + %add = add i64 %2, %x + ret i64 %add +} + +define i64 @cntp_add_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.s +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) + %add = add i64 %2, %x + ret i64 %add +} + +define i64 @cntp_add_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.d +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %add = add i64 %2, %x + ret i64 %add +} + +define i64 @cntp_add_all_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %2, %pg) + %add = add i64 %3, %x + ret i64 %add +} + +define i64 @cntp_add_nxv2i1_oneuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_nxv2i1_oneuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: add x9, x8, x0 +; CHECK-NEXT: madd x0, x8, x0, x9 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %add = add i64 %2, %x + %mul = mul i64 %2, %x + %res = add i64 %add, %mul + ret i64 %res +} + +; DECP + +define i64 @cntp_sub_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.b +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) + %sub = sub i64 %x, %2 + ret i64 %sub +} + +define i64 @cntp_sub_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) + %sub = sub i64 %x, %2 + ret i64 %sub +} + +define i64 @cntp_sub_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.s +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) + %sub = sub i64 %x, %2 + ret i64 %sub +} + +define i64 @cntp_sub_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.d +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %sub = sub i64 %x, %2 + ret i64 %sub +} + +define i64 @cntp_sub_all_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %2, %pg) + %sub = sub i64 %x, %3 + ret i64 %sub +} + +define i64 @cntp_sub_nxv2i1_multiuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: sub x9, x8, x0 +; CHECK-NEXT: madd x0, x8, x0, x9 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %add = sub i64 %2, %x + %mul = mul i64 %2, %x + %res = add i64 %add, %mul + ret i64 %res +} + + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare i64 @llvm.aarch64.sve.cntp.nxv16i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv8i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv4i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv2i1(, ) + +attributes #0 = { "target-features"="+sve" }