diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -287,6 +287,8 @@ bool isSMERead() const { return Flags & IsSMERead; } bool isSMEWrite() const { return Flags & IsSMEWrite; } bool isSMEZero() const { return Flags & IsSMEZero; } + bool isSMELdr() const { return Flags & IsSMELdr; } + bool isSMEStr() const { return Flags & IsSMEStr; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -212,6 +212,8 @@ def IsSMERead : FlagType<0x4000000000>; def IsSMEWrite : FlagType<0x8000000000>; def IsSMEZero : FlagType<0x10000000000>; +def IsSMELdr : FlagType<0x20000000000>; +def IsSMEStr : FlagType<0x40000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -571,6 +573,8 @@ def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_vert">; def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_vert">; +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", [IsOverloadNone, IsSME, IsSMELdr], MemEltTyDefault, "aarch64_sme_ldr">; + //////////////////////////////////////////////////////////////////////////////// // Stores @@ -715,6 +719,8 @@ def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_vert">; def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_vert">; +def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", [IsOverloadNone, IsSME, IsSMEStr], MemEltTyDefault, "aarch64_sme_str">; + //////////////////////////////////////////////////////////////////////////////// // Prefetches diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9134,6 +9134,21 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty); + llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale"); + llvm::Value *MulVL = Builder.CreateMul( + VscaleCall, + Builder.getInt64(16 * cast(Ops[1])->getZExtValue()), + "mulvl"); + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); + Ops.erase(&Ops[1]); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9272,6 +9287,8 @@ return EmitSMEWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isSMEZero()) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isSMELdr() || TypeFlags.isSMEStr()) + return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherLoad()) return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isScatterStore()) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4243,6 +4243,9 @@ llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -0,0 +1,61 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svldr_vnum_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svldr_vnum_zajPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-LABEL: @test_svldr_vnum_za_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svldr_vnum_za_1jPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-LABEL: @test_svldr_vnum_za_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svldr_vnum_za_2jPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 16, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -0,0 +1,62 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + + +// CHECK-LABEL: @test_svstr_vnum_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstr_vnum_zajPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-LABEL: @test_svstr_vnum_za_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svstr_vnum_za_1jPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-LABEL: @test_svstr_vnum_za_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svstr_vnum_za_2jPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 16, ptr); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -873,6 +873,10 @@ this->SMEAttributes = "arm_streaming, arm_shared_za"; else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEZero")) this->SMEAttributes = "arm_streaming_compatible, arm_shared_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMELdr")) + this->SMEAttributes = "arm_streaming_compatible, arm_shared_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEStr")) + this->SMEAttributes = "arm_streaming_compatible, arm_shared_za, arm_preserves_za"; else llvm_unreachable("Unknown SME instruction"); } else