diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -138,6 +138,7 @@ AMX, PPCQuad, AnyPtrToElt, + AArch64Svcount, } Kind; union { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -316,6 +316,7 @@ def IIT_ANYPTR_TO_ELT : IIT_Base<56>; def IIT_I2 : IIT_Int<2, 57>; def IIT_I4 : IIT_Int<4, 58>; +def IIT_AARCH64_SVCOUNT : IIT_VT; } defvar IIT_all_FixedTypes = !filter(iit, IIT_all, @@ -511,6 +512,8 @@ def llvm_x86mmx_ty : LLVMType; def llvm_ptrx86mmx_ty : LLVMPointerType; // <1 x i64>* +def llvm_aarch64_svcount_ty : LLVMType; + def llvm_x86amx_ty : LLVMType; def llvm_v2i1_ty : LLVMType; // 2 x i1 diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2764,6 +2764,33 @@ [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } + // + // Predicate-as-counter intrinsics + // + + + def int_aarch64_sve_ptrue_c8 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c16 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c32 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c64 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + + def int_aarch64_sve_cntp_c8 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_aarch64_sve_cntp_c16 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_aarch64_sve_cntp_c32 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_aarch64_sve_cntp_c64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + // // SME2 Intrinsics // diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1099,6 +1099,9 @@ case IIT_I4: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4)); return; + case IIT_AARCH64_SVCOUNT: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::AArch64Svcount, 0)); + return; case IIT_I8: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8)); return; @@ -1340,6 +1343,8 @@ case IITDescriptor::Double: return Type::getDoubleTy(Context); case IITDescriptor::Quad: return Type::getFP128Ty(Context); case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context); + case IITDescriptor::AArch64Svcount: + return TargetExtType::get(Context, "aarch64.svcount"); case IITDescriptor::Integer: return IntegerType::get(Context, D.Integer_Width); @@ -1514,6 +1519,9 @@ case IITDescriptor::Quad: return !Ty->isFP128Ty(); case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty(); case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); + case IITDescriptor::AArch64Svcount: + return !isa(Ty) || + cast(Ty)->getName() != "aarch64.svcount"; case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); return !VT || VT->getElementCount() != D.Vector_Width || diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -506,6 +506,12 @@ : Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))), (inst $Op1, ImmTy:$Op2)>; +multiclass SVE2p1_Cntp_Pat { + def : Pat<(vtd (op vt1:$Op1, (i32 2))), (inst $Op1, 0)>; + def : Pat<(vtd (op vt1:$Op1, (i32 4))), (inst $Op1, 1)>; +} + class SVE_3_Op_Imm_Pat @@ -9158,9 +9164,9 @@ def : SVE_4_Op_Imm_Pat(NAME)>; } -class sve2p1_ptrue_pn sz, PNRP8to15RegOp pnrty> +class sve2p1_ptrue_pn sz, PNRP8to15RegOp pnrty, SDPatternOperator op> : I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd", - "", []>, Sched<[]> { + "", [(set pnrty:$PNd, (op))]>, Sched<[]> { bits<3> PNd; let Inst{31-24} = 0b00100101; let Inst{23-22} = sz; @@ -9172,10 +9178,10 @@ multiclass sve2p1_ptrue_pn { - def _B : sve2p1_ptrue_pn; - def _H : sve2p1_ptrue_pn; - def _S : sve2p1_ptrue_pn; - def _D : sve2p1_ptrue_pn; + def _B : sve2p1_ptrue_pn; + def _H : sve2p1_ptrue_pn; + def _S : sve2p1_ptrue_pn; + def _D : sve2p1_ptrue_pn; } @@ -9543,6 +9549,11 @@ def _H : sve2p1_pcount_pn; def _S : sve2p1_pcount_pn; def _D : sve2p1_pcount_pn; + + defm : SVE2p1_Cntp_Pat(NAME # _B)>; + defm : SVE2p1_Cntp_Pat(NAME # _H)>; + defm : SVE2p1_Cntp_Pat(NAME # _S)>; + defm : SVE2p1_Cntp_Pat(NAME # _D)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s + +define i64 @test_svcntp_c8_vlx2(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c8_vlx2: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.b, vlx2 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") %pn, i32 2) + ret i64 %res +} + +define i64 @test_svcntp_c8_vlx4(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c8_vlx4: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.b, vlx4 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") %pn, i32 4) + ret i64 %res +} + +define i64 @test_svcntp_c16_vlx2(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c16_vlx2: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.h, vlx2 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") %pn, i32 2) + ret i64 %res +} + +define i64 @test_svcntp_c16_vlx4(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c16_vlx4: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.h, vlx4 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") %pn, i32 4) + ret i64 %res +} + +define i64 @test_svcntp_c32_vlx2(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c32_vlx2: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.s, vlx2 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") %pn, i32 2) + ret i64 %res +} + +define i64 @test_svcntp_c32_vlx4(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c32_vlx4: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.s, vlx4 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") %pn, i32 4) + ret i64 %res +} + +define i64 @test_svcntp_c64_vlx2(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c64_vlx2: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.d, vlx2 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") %pn, i32 2) + ret i64 %res +} + +define i64 @test_svcntp_c64_vlx4(target("aarch64.svcount") %pn) nounwind { +; CHECK-LABEL: test_svcntp_c64_vlx4: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x0, pn0.d, vlx4 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") %pn, i32 4) + ret i64 %res +} + + +declare i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount"), i32) +declare i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount"), i32) +declare i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount"), i32) +declare i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount"), i32) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s + +define target("aarch64.svcount") @ptrue_b() nounwind { +; CHECK-LABEL: ptrue_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + ret target("aarch64.svcount") %res +} + +define target("aarch64.svcount") @ptrue_h() nounwind { +; CHECK-LABEL: ptrue_h: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.h +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16() + ret target("aarch64.svcount") %res +} + +define target("aarch64.svcount") @ptrue_s() nounwind { +; CHECK-LABEL: ptrue_s: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.s +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32() + ret target("aarch64.svcount") %res +} + +define target("aarch64.svcount") @ptrue_d() nounwind { +; CHECK-LABEL: ptrue_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.d +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64() + ret target("aarch64.svcount") %res +} + +declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16() +declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32() +declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64()