diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -272,7 +272,6 @@ def sve_cnt_mul_imm_i64 : ComplexPattern">; def sve_cnt_shl_imm : ComplexPattern">; - def sve_ext_imm_0_31 : ComplexPattern">; def sve_ext_imm_0_63 : ComplexPattern">; def sve_ext_imm_0_127 : ComplexPattern">; @@ -880,7 +879,7 @@ def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm))), (!cast(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; - def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm)))), + def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm))), (!cast(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; def : Pat<(i64 (op sve_pred_enum:$pattern)), @@ -963,7 +962,7 @@ def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm)))), (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; - def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))), + def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm)))), (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>; def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))), @@ -976,7 +975,7 @@ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), sub_32))>; - def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))), + def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_shl_imm i32:$imm)))), (i32 (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm), sub_32))>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll @@ -0,0 +1,352 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -check-prefix=NO_SCALAR_INC +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; INCB + +define i32 @incb(i32 %a) { +; NO_SCALAR_INC-LABEL: incb: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntb x8, vl5 +; NO_SCALAR_INC-NEXT: add w0, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incb: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incb x0, vl5 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5) + %conv = trunc i64 %cnt to i32 + %out = add i32 %conv, %a + ret i32 %out +} + +define i32 @incb_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: incb_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntb x8, vl8 +; NO_SCALAR_INC-NEXT: add w0, w0, w8, lsl #2 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incb_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incb x0, vl8, mul #4 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 4 + %out = add i32 %mul, %a + ret i32 %out +} + +; +; DECB +; + +define i32 @decb(i32 %a) { +; NO_SCALAR_INC-LABEL: decb: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntb x8, vl6 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decb: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decb x0, vl6 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6) + %conv = trunc i64 %cnt to i32 + %out = sub i32 %a, %conv + ret i32 %out +} + +define i32 @decb_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: decb_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntb x8, vl6 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8, lsl #3 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decb_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decb x0, vl6, mul #8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 8 + %out = sub i32 %a, %mul + ret i32 %out +} + +; INCH + +define i32 @inch(i32 %a) { +; NO_SCALAR_INC-LABEL: inch: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8, #16 +; NO_SCALAR_INC-NEXT: add w0, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: inch: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: inch x0, #16 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 16) + %conv = trunc i64 %cnt to i32 + %out = add i32 %conv, %a + ret i32 %out +} + +define i32 @inch_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: inch_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: mov w8, #5 +; NO_SCALAR_INC-NEXT: cnth x9, vl8 +; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: inch_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: inch x0, vl8, mul #5 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 5 + %out = add i32 %mul, %a + ret i32 %out +} + +; +; DECH +; + +define i32 @dech(i32 %a) { +; NO_SCALAR_INC-LABEL: dech: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cnth x8, vl1 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: dech: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dech x0, vl1 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1) + %conv = trunc i64 %cnt to i32 + %out = sub i32 %a, %conv + ret i32 %out +} + +define i32 @dech_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: dech_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: mov w8, #7 +; NO_SCALAR_INC-NEXT: cnth x9, vl16 +; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: dech_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dech x0, vl16, mul #7 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 7 + %out = sub i32 %a, %mul + ret i32 %out +} + +; +; INCW +; + +define i32 @incw(i32 %a) { +; NO_SCALAR_INC-LABEL: incw: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8, #16 +; NO_SCALAR_INC-NEXT: add w0, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incw: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incw x0, #16 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16) + %conv = trunc i64 %cnt to i32 + %out = add i32 %conv, %a + ret i32 %out +} + +define i32 @incw_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: incw_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: mov w8, #12 +; NO_SCALAR_INC-NEXT: cntw x9, vl32 +; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incw_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incw x0, vl32, mul #12 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 12 + %out = add i32 %mul, %a + ret i32 %out +} + +; +; DECW +; + +define i32 @decw(i32 %a) { +; NO_SCALAR_INC-LABEL: decw: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8, vl64 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decw: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decw x0, vl64 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 11) + %conv = trunc i64 %cnt to i32 + %out = sub i32 %a, %conv + ret i32 %out +} + +define i32 @decw_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: decw_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntw x8, vl128 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8, lsl #4 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decw_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decw x0, vl128, mul #16 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 16 + %out = sub i32 %a, %mul + ret i32 %out +} + +; INCD + +define i32 @incd(i32 %base) { +; NO_SCALAR_INC-LABEL: incd: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8, vl64 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incd: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incd x0, vl64 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11) + %conv = trunc i64 %cnt to i32 + %add = add i32 %base, %conv + ret i32 %add +} + +define i32 @incd_mul(i32 %base) { +; NO_SCALAR_INC-LABEL: incd_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: mov w8, #15 +; NO_SCALAR_INC-NEXT: cntd x9, vl64 +; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: incd_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incd x0, vl64, mul #15 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 15 + %add = add i32 %base, %mul + ret i32 %add +} + +; +; DECD +; + +define i32 @decd(i32 %a) { +; NO_SCALAR_INC-LABEL: decd: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: cntd x8, #16 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decd: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decd x0, #16 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16) + %conv = trunc i64 %cnt to i32 + %out = sub i32 %a, %conv + ret i32 %out +} + +define i32 @decd_mul(i32 %a) { +; NO_SCALAR_INC-LABEL: decd_mul: +; NO_SCALAR_INC: // %bb.0: +; NO_SCALAR_INC-NEXT: mov w8, #9 +; NO_SCALAR_INC-NEXT: cntd x9, vl2 +; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: ret +; +; CHECK-LABEL: decd_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decd x0, vl2, mul #9 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2) + %conv = trunc i64 %cnt to i32 + %mul = mul i32 %conv, 9 + %out = sub i32 %a, %mul + ret i32 %out +} + +declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) +declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) +declare i64 @llvm.aarch64.sve.cntw(i32 %pattern) +declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)