diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -366,6 +366,8 @@ bool isStore() const { return Flags & IsStore; } bool isMove() const { return Flags & IsMove; } bool isZero() const { return Flags & IsZero; } + bool isLoadReg() const { return Flags & IsLoadReg; } + bool isStoreReg() const { return Flags & IsStoreReg; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -40,6 +40,8 @@ def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_vert">; def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_vert">; +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", [IsOverloadNone, IsLoadReg, IsStreamingCompatible, IsSharedZA], MemEltTyDefault, "aarch64_sme_ldr">; + //////////////////////////////////////////////////////////////////////////////// // Stores @@ -65,6 +67,8 @@ def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert">; def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert">; +def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", [IsOverloadNone, IsStoreReg, IsStreamingCompatible, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_str">; + //////////////////////////////////////////////////////////////////////////////// // SME - Read horizontal/vertical ZA slices diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -217,6 +217,8 @@ def IsPreservesZA : FlagType<0x4000000000>; def IsMove : FlagType<0x8000000000>; def IsZero : FlagType<0x10000000000>; +def IsLoadReg : FlagType<0x20000000000>; +def IsStoreReg : FlagType<0x40000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9448,6 +9448,21 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMELdrStr(SMETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty); + llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale"); + llvm::Value *MulVL = Builder.CreateMul( + VscaleCall, + Builder.getInt64(16 * cast(Ops[1])->getZExtValue()), + "mulvl"); + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); + Ops.erase(&Ops[1]); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9906,6 +9921,8 @@ return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isZero()) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isLoadReg() || TypeFlags.isStoreReg()) + return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4259,6 +4259,9 @@ llvm::Value *EmitSMEZero(SMETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMELdrStr(SMETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -0,0 +1,42 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_1( +// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_2jPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 16, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -0,0 +1,43 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + + +// CHECK-C-LABEL: @test_svstr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_1( +// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_2jPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 16, ptr); +}