diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -281,6 +281,9 @@ bool isTupleCreate() const { return Flags & IsTupleCreate; } bool isTupleGet() const { return Flags & IsTupleGet; } bool isTupleSet() const { return Flags & IsTupleSet; } + bool isSME() const { return Flags & IsSME; } + bool isSMELd1() const { return Flags & IsSMELd1; } + bool isSMESt1() const { return Flags & IsSMESt1; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -98,6 +98,7 @@ // N: svfloat64_t // J: Prefetch type (sv_prfop) +// %: pointer to void // A: pointer to int8_t // B: pointer to int16_t // C: pointer to int32_t @@ -205,6 +206,9 @@ def IsTupleCreate : FlagType<0x100000000>; def IsTupleGet : FlagType<0x200000000>; def IsTupleSet : FlagType<0x400000000>; +def IsSME : FlagType<0x800000000>; +def IsSMELd1 : FlagType<0x1000000000>; +def IsSMESt1 : FlagType<0x2000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -542,6 +546,28 @@ def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } +def SVLD1_HOR_ZA8 : MInst<"svld1_hor_za8", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1b_horiz">; +def SVLD1_HOR_ZA16 : MInst<"svld1_hor_za16", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1h_horiz">; +def SVLD1_HOR_ZA32 : MInst<"svld1_hor_za32", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1w_horiz">; +def SVLD1_HOR_ZA64 : MInst<"svld1_hor_za64", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_horiz">; +def SVLD1_HOR_ZA128 : MInst<"svld1_hor_za128", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_horiz">; +def SVLD1_VER_ZA8 : MInst<"svld1_ver_za8", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1b_vert">; +def SVLD1_VER_ZA16 : MInst<"svld1_ver_za16", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1h_vert">; +def SVLD1_VER_ZA32 : MInst<"svld1_ver_za32", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1w_vert">; +def SVLD1_VER_ZA64 : MInst<"svld1_ver_za64", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_vert">; +def SVLD1_VER_ZA128 : MInst<"svld1_ver_za128", "vimiPQ", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_vert">; + +def SVLD1_HOR_VNUM_ZA8 : MInst<"svld1_hor_vnum_za8", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1b_horiz">; +def SVLD1_HOR_VNUM_ZA16 : MInst<"svld1_hor_vnum_za16", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1h_horiz">; +def SVLD1_HOR_VNUM_ZA32 : MInst<"svld1_hor_vnum_za32", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1w_horiz">; +def SVLD1_HOR_VNUM_ZA64 : MInst<"svld1_hor_vnum_za64", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_horiz">; +def SVLD1_HOR_VNUM_ZA128 : MInst<"svld1_hor_vnum_za128", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_horiz">; +def SVLD1_VER_VNUM_ZA8 : MInst<"svld1_ver_vnum_za8", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1b_vert">; +def SVLD1_VER_VNUM_ZA16 : MInst<"svld1_ver_vnum_za16", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1h_vert">; +def SVLD1_VER_VNUM_ZA32 : MInst<"svld1_ver_vnum_za32", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1w_vert">; +def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_vert">; +def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_vert">; + //////////////////////////////////////////////////////////////////////////////// // Stores @@ -664,6 +690,28 @@ def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; } +def SVST1_HOR_ZA8 : MInst<"svst1_hor_za8", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1b_horiz">; +def SVST1_HOR_ZA16 : MInst<"svst1_hor_za16", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1h_horiz">; +def SVST1_HOR_ZA32 : MInst<"svst1_hor_za32", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1w_horiz">; +def SVST1_HOR_ZA64 : MInst<"svst1_hor_za64", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_horiz">; +def SVST1_HOR_ZA128 : MInst<"svst1_hor_za128", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_horiz">; +def SVST1_VER_ZA8 : MInst<"svst1_ver_za8", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1b_vert">; +def SVST1_VER_ZA16 : MInst<"svst1_ver_za16", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1h_vert">; +def SVST1_VER_ZA32 : MInst<"svst1_ver_za32", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1w_vert">; +def SVST1_VER_ZA64 : MInst<"svst1_ver_za64", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_vert">; +def SVST1_VER_ZA128 : MInst<"svst1_ver_za128", "vimiP%", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_vert">; + +def SVST1_HOR_VNUM_ZA8 : MInst<"svst1_hor_vnum_za8", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1b_horiz">; +def SVST1_HOR_VNUM_ZA16 : MInst<"svst1_hor_vnum_za16", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1h_horiz">; +def SVST1_HOR_VNUM_ZA32 : MInst<"svst1_hor_vnum_za32", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1w_horiz">; +def SVST1_HOR_VNUM_ZA64 : MInst<"svst1_hor_vnum_za64", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_horiz">; +def SVST1_HOR_VNUM_ZA128 : MInst<"svst1_hor_vnum_za128", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_horiz">; +def SVST1_VER_VNUM_ZA8 : MInst<"svst1_ver_vnum_za8", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1b_vert">; +def SVST1_VER_VNUM_ZA16 : MInst<"svst1_ver_vnum_za16", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1h_vert">; +def SVST1_VER_VNUM_ZA32 : MInst<"svst1_ver_vnum_za32", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1w_vert">; +def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_vert">; +def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_vert">; + //////////////////////////////////////////////////////////////////////////////// // Prefetches diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8674,6 +8674,7 @@ switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); + case 1: case 2: case 4: case 8: @@ -9027,6 +9028,75 @@ return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + +Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + llvm::Type *BasePtrType; + switch (IntID) { + case Intrinsic::aarch64_sme_ld1h_horiz: + case Intrinsic::aarch64_sme_ld1h_vert: + case Intrinsic::aarch64_sme_st1h_horiz: + case Intrinsic::aarch64_sme_st1h_vert: + BasePtrType = Int16Ty; + Ops[3] = EmitSVEPredicateCast( + Ops[3], llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8)); + break; + case Intrinsic::aarch64_sme_ld1w_horiz: + case Intrinsic::aarch64_sme_ld1w_vert: + case Intrinsic::aarch64_sme_st1w_horiz: + case Intrinsic::aarch64_sme_st1w_vert: + BasePtrType = Int32Ty; + Ops[3] = EmitSVEPredicateCast( + Ops[3], llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4)); + break; + case Intrinsic::aarch64_sme_ld1d_horiz: + case Intrinsic::aarch64_sme_ld1d_vert: + case Intrinsic::aarch64_sme_st1d_horiz: + case Intrinsic::aarch64_sme_st1d_vert: + BasePtrType = Int64Ty; + Ops[3] = EmitSVEPredicateCast( + Ops[3], llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2)); + break; + case Intrinsic::aarch64_sme_ld1q_horiz: + case Intrinsic::aarch64_sme_ld1q_vert: + case Intrinsic::aarch64_sme_st1q_horiz: + case Intrinsic::aarch64_sme_st1q_vert: + BasePtrType = llvm::IntegerType::get(getLLVMContext(), 128); + Ops[3] = EmitSVEPredicateCast( + Ops[3], llvm::ScalableVectorType::get(Builder.getInt1Ty(), 1)); + break; + default: + BasePtrType = Int8Ty; + break; + } + + SmallVector NewOps; + NewOps.push_back(Ops[3]); + + llvm::Value *BasePtr = Ops[4]; + + // Contains the vnum parameter + if (Ops.size() == 6) { + Function *StreamingVectorLength = + CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb, {}); + llvm::Value *StreamingVectorLengthCall = + Builder.CreateCall(StreamingVectorLength, {}); + llvm::Value *Mulvl = + Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + BasePtr = Builder.CreateGEP(BasePtrType, Ops[4], Mulvl); + } + NewOps.push_back(BasePtr); + NewOps.push_back(Builder.CreateIntCast(Ops[0], Int64Ty, false)); + NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, NewOps); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9157,6 +9227,8 @@ TypeFlags.isZExtReturn()); else if (TypeFlags.isStore()) return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isSMELd1() || TypeFlags.isSMESt1()) + return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherLoad()) return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isScatterStore()) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4230,6 +4230,10 @@ llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl &Ops, unsigned BuiltinID); + llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); + llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -308,6 +308,8 @@ clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h) # Generate arm_sve.h clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h) + # Generate arm_sme.h + clang_generate_header(-gen-arm-sme-header arm_sve.td arm_sme.h) # Generate arm_bf16.h clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h) # Generate arm_mve.h @@ -332,6 +334,7 @@ list(APPEND aarch64_only_generated_files "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h" + "${CMAKE_CURRENT_BINARY_DIR}/arm_sme.h" "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h" "${output_dir}/arm_neon_sve_bridge.h" ) diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svld1_hor_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za8(0, slice_base, 0, pg, ptr); + svld1_hor_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_hor_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za16ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za16(0, slice_base, 0, pg, ptr); + svld1_hor_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_hor_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za32ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za32(0, slice_base, 0, pg, ptr); + svld1_hor_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_hor_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za64ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za64(0, slice_base, 0, pg, ptr); + svld1_hor_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_hor_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svld1_hor_za128ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za128(0, slice_base, 0, pg, ptr); + svld1_hor_za128(15, slice_base, 0, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_ver_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svld1_ver_za8ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za8(0, slice_base, 0, pg, ptr); + svld1_ver_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_ver_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za16ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za16(0, slice_base, 0, pg, ptr); + svld1_ver_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_ver_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za32ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za32(0, slice_base, 0, pg, ptr); + svld1_ver_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_ver_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za64ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za64(0, slice_base, 0, pg, ptr); + svld1_ver_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-LABEL: @test_svld1_ver_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svld1_ver_za128ju10__SVBool_tPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za128(0, slice_base, 0, pg, ptr); + svld1_ver_za128(15, slice_base, 0, pg, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -0,0 +1,269 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svld1_hor_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_hor_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_hor_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_hor_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_hor_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_ver_hor_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_ver_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_ver_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_ver_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-LABEL: @test_svld1_ver_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svst1_hor_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za8(0, slice_base, 0, pg, ptr); + svst1_hor_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_hor_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za16ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za16(0, slice_base, 0, pg, ptr); + svst1_hor_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_hor_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za32ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za32(0, slice_base, 0, pg, ptr); + svst1_hor_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_hor_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za64ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za64(0, slice_base, 0, pg, ptr); + svst1_hor_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_hor_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst1_hor_za128ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za128(0, slice_base, 0, pg, ptr); + svst1_hor_za128(15, slice_base, 0, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_ver_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svst1_ver_za8ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[PTR]], i64 0, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za8(0, slice_base, 0, pg, ptr); + svst1_ver_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_ver_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za16ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR]], i64 1, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za16(0, slice_base, 0, pg, ptr); + svst1_ver_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_ver_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za32ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR]], i64 3, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za32(0, slice_base, 0, pg, ptr); + svst1_ver_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_ver_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za64ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR]], i64 7, i32 [[TILESLICE1]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za64(0, slice_base, 0, pg, ptr); + svst1_ver_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-LABEL: @test_svst1_ver_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst1_ver_za128ju10__SVBool_tPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za128(0, slice_base, 0, pg, ptr); + svst1_ver_za128(15, slice_base, 0, pg, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c @@ -0,0 +1,299 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +// +// + +// CHECK-LABEL: @test_svst1_hor_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_hor_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_hor_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_hor_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_hor_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_ver_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[TMP1]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i64 0, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_ver_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i64 1, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_ver_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i64 3, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_ver_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i64 7, i32 [[TILESLICE2]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// +// + +// CHECK-LABEL: @test_svst1_ver_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPvl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i64 15, i32 [[SLICE_BASE]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -169,6 +169,11 @@ SmallVector ImmChecks; + /// True if this is an SME intrinsic. + bool IsSMEIntrinsic; + /// Attributes for SME intrinsics. + std::string SMEAttributes; + public: Intrinsic(StringRef Name, StringRef Proto, uint64_t MergeTy, StringRef MergeSuffix, uint64_t MemoryElementTy, StringRef LLVMName, @@ -194,6 +199,10 @@ uint64_t getFlags() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag;} + bool isSMEIntrinsic() const { return IsSMEIntrinsic; } + // Return a comma seperated string of SME attributes. + std::string getSMEAttributes() const { return SMEAttributes; } + ArrayRef getImmChecks() const { return ImmChecks; } /// Return the type string for a BUILTIN() macro in Builtins.def. @@ -334,6 +343,9 @@ /// Emit arm_sve.h. void createHeader(raw_ostream &o); + /// Emit arm_sme.h. + void createSMEHeader(raw_ostream &o); + /// Emit all the __builtin prototypes and code needed by Sema. void createBuiltins(raw_ostream &o); @@ -347,7 +359,9 @@ void createTypeFlags(raw_ostream &o); /// Create intrinsic and add it to \p Out - void createIntrinsic(Record *R, SmallVectorImpl> &Out); + void createIntrinsic(Record *R, + SmallVectorImpl> &Out, + bool IsSME = false); }; } // end anonymous namespace @@ -757,6 +771,11 @@ NumVectors = 0; Signed = true; break; + case '%': + Pointer = true; + Void = true; + NumVectors = 0; + break; case 'A': Pointer = true; ElementBitwidth = Bitwidth = 8; @@ -840,6 +859,18 @@ this->Flags |= Emitter.encodeMergeType(MergeTy); if (hasSplat()) this->Flags |= Emitter.encodeSplatOperand(getSplatIdx()); + + // Set attributes for SME intrinsics. + if (this->Flags & Emitter.getEnumValueForFlag("IsSME")) { + this->IsSMEIntrinsic = true; + if (this->Flags & Emitter.getEnumValueForFlag("IsSMELd1")) + this->SMEAttributes = "arm_streaming, arm_shared_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMESt1")) + this->SMEAttributes = "arm_streaming, arm_shared_za, arm_preserves_za"; + else + llvm_unreachable("Unknown SME instruction"); + } else + this->IsSMEIntrinsic = false; } std::string Intrinsic::getBuiltinTypeStr() { @@ -926,7 +957,10 @@ OS << (IsOverloaded ? "__aio " : "__ai ") << "__attribute__((__clang_arm_builtin_alias(" - << "__builtin_sve_" << FullName << ")))\n"; + << "__builtin_sve_" << FullName << ")"; + if (isSMEIntrinsic()) + OS << ", " << getSMEAttributes(); + OS << "))\n"; OS << getTypes()[0].str() << " " << ProtoName << "("; for (unsigned I = 0; I < getTypes().size() - 1; ++I) { @@ -989,7 +1023,7 @@ } void SVEEmitter::createIntrinsic( - Record *R, SmallVectorImpl> &Out) { + Record *R, SmallVectorImpl> &Out, bool IsSME) { StringRef Name = R->getValueAsString("Name"); StringRef Proto = R->getValueAsString("Prototype"); StringRef Types = R->getValueAsString("Types"); @@ -1005,6 +1039,9 @@ for (auto FlagRec : FlagsList) Flags |= FlagRec->getValueAsInt("Value"); + bool SMEFlag = Flags & getEnumValueForFlag("IsSME"); + if (SMEFlag != IsSME) + return; // Create a dummy TypeSpec for non-overloaded builtins. if (Types.empty()) { assert((Flags & getEnumValueForFlag("IsOverloadNone")) && @@ -1288,11 +1325,90 @@ OS << "#endif /* __ARM_SVE_H */\n"; } +void SVEEmitter::createSMEHeader(raw_ostream &OS) { + OS << "/*===---- arm_sme.h - ARM SME intrinsics " + "-----------------------------------===\n" + " *\n" + " *\n" + " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " + "Exceptions.\n" + " * See https://llvm.org/LICENSE.txt for license information.\n" + " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" + " *\n" + " *===-----------------------------------------------------------------" + "------===\n" + " */\n\n"; + + OS << "#ifndef __ARM_SME_H\n"; + OS << "#define __ARM_SME_H\n\n"; + + OS << "#if !defined(__ARM_FEATURE_SME)\n"; + OS << "#error \"SME support not enabled\"\n"; + OS << "#else\n\n"; + + OS << "#include \n\n"; + + OS << "/* Function attributes */\n"; + OS << "#define __ai static __inline__ __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + OS << "#ifdef __cplusplus\n"; + OS << "extern \"C\" {\n"; + OS << "#endif\n\n"; + + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs, true); + + // Sort intrinsics in header file by following order/priority similar to SVE: + // - Architectural guard + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort(Defs.begin(), Defs.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), + (unsigned)I->getClassKind(), + I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); + + StringRef InGuard = ""; + for (auto &I : Defs) { + // Emit #endif/#if pair if needed. + if (I->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif //" << InGuard << "\n"; + InGuard = I->getGuard(); + if (!InGuard.empty()) + OS << "\n#if " << InGuard << "\n"; + } + + // Actually emit the intrinsic declaration. + I->emitIntrinsic(OS); + } + + if (!InGuard.empty()) + OS << "#endif //" << InGuard << "\n"; + + OS << "#ifdef __cplusplus\n"; + OS << "} // extern \"C\"\n"; + OS << "#endif\n\n"; + OS << "#undef __ai\n\n"; + OS << "#endif /*__ARM_FEATURE_SME */\n\n"; + OS << "#endif /* __ARM_SME_H */\n"; +} + void SVEEmitter::createBuiltins(raw_ostream &OS) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; - for (auto *R : RV) + for (auto *R : RV) { createIntrinsic(R, Defs); + createIntrinsic(R, Defs, true); + } // The mappings must be sorted based on BuiltinID. llvm::sort(Defs, [](const std::unique_ptr &A, @@ -1322,8 +1438,10 @@ void SVEEmitter::createCodeGenMap(raw_ostream &OS) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; - for (auto *R : RV) + for (auto *R : RV) { createIntrinsic(R, Defs); + createIntrinsic(R, Defs, true); + } // The mappings must be sorted based on BuiltinID. llvm::sort(Defs, [](const std::unique_ptr &A, @@ -1355,8 +1473,10 @@ void SVEEmitter::createRangeChecks(raw_ostream &OS) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; - for (auto *R : RV) + for (auto *R : RV) { createIntrinsic(R, Defs); + createIntrinsic(R, Defs, true); + } // The mappings must be sorted based on BuiltinID. llvm::sort(Defs, [](const std::unique_ptr &A, @@ -1420,6 +1540,10 @@ SVEEmitter(Records).createHeader(OS); } +void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSMEHeader(OS); +} + void EmitSveBuiltins(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createBuiltins(OS); } diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -81,6 +81,7 @@ GenArmSveBuiltinCG, GenArmSveTypeFlags, GenArmSveRangeChecks, + GenArmSmeHeader, GenArmCdeHeader, GenArmCdeBuiltinDef, GenArmCdeBuiltinSema, @@ -219,6 +220,8 @@ "Generate arm_sve_typeflags.inc for clang"), clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks", "Generate arm_sve_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSmeHeader, "gen-arm-sme-header", + "Generate arm_sme.h for clang"), clEnumValN(GenArmMveHeader, "gen-arm-mve-header", "Generate arm_mve.h for clang"), clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def", @@ -438,6 +441,9 @@ case GenArmSveRangeChecks: EmitSveRangeChecks(Records, OS); break; + case GenArmSmeHeader: + EmitSmeHeader(Records, OS); + break; case GenArmCdeHeader: EmitCdeHeader(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -101,6 +101,8 @@ void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); + void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);