diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1865,10 +1865,21 @@ def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; + +def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [], []>; +def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [], []>; +def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [], []>; +def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [], []>; } let TargetGuard = "sve2p1" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; + +def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [], []>; +def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>; +def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>; +def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>; + def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10007,7 +10007,33 @@ switch (BuiltinID) { default: return nullptr; - + case SVE::BI__builtin_sve_svpsel_lane_b8: + case SVE::BI__builtin_sve_svpsel_lane_b16: + case SVE::BI__builtin_sve_svpsel_lane_b32: + case SVE::BI__builtin_sve_svpsel_lane_b64: + case SVE::BI__builtin_sve_svpsel_lane_c8: + case SVE::BI__builtin_sve_svpsel_lane_c16: + case SVE::BI__builtin_sve_svpsel_lane_c32: + case SVE::BI__builtin_sve_svpsel_lane_c64: { + bool IsSVCount = isa(Ops[0]->getType()); + assert(((!IsSVCount || cast(Ops[0]->getType())->getName() == + "aarch64.svcount")) && + "Unexpected TargetExtType"); + auto SVCountTy = + llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); + Function *CastFromSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); + Function *CastToSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); + + auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier)); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy); + llvm::Value *Ops0 = + IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0]; + llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy); + llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]}); + return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel; + } case SVE::BI__builtin_sve_svmov_b_z: { // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) SVETypeFlags TypeFlags(Builtin->TypeModifier); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c @@ -0,0 +1,165 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svpsel_lane_b8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[ADD]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_b8u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[ADD]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_b8(p1, p2, idx + 15); +} + +// CHECK-LABEL: @test_svpsel_lane_b16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b16u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_b16(p1, p2, idx + 7); +} + +// CHECK-LABEL: @test_svpsel_lane_b32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b32u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_b32(p1, p2, idx + 3); +} + +// CHECK-LABEL: @test_svpsel_lane_b64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b64u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_b64(p1, p2, idx + 1); +} + +// CHECK-LABEL: @test_svpsel_lane_c8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[TMP0]], [[P2:%.*]], i32 [[ADD]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP1]]) +// CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_c8u11__SVCount_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[TMP0]], [[P2:%.*]], i32 [[ADD]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP1]]) +// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]] +// +svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_c8(p1, p2, idx + 15); +} + +// CHECK-LABEL: @test_svpsel_lane_c16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c16u11__SVCount_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_c16(p1, p2, idx + 7); +} + +// CHECK-LABEL: @test_svpsel_lane_c32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c32u11__SVCount_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_c32(p1, p2, idx + 3); +} + +// CHECK-LABEL: @test_svpsel_lane_c64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c64u11__SVCount_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) +// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] +// +svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) { + return svpsel_lane_c64(p1, p2, idx + 1); +}