diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_stg, "vv*", "t") BUILTIN(__builtin_arm_subp, "Uiv*v*", "t") +// Memory Operations +BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "") + // Memory barrier BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9777,6 +9777,18 @@ return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Operations (MOPS) + if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *Size = EmitScalarExpr(E->getArg(2)); + Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); + Val = Builder.CreateTrunc(Val, Int8Ty); + Size = Builder.CreateIntCast(Size, Int64Ty, false); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); + } + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -730,6 +730,12 @@ #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Memory Operations Intrinsics */ +#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING +#define __arm_mops_memset_tag(__tagged_address, __value, __size) \ + __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) +#endif + /* Transactional Memory Extension (TME) Intrinsics */ #if __ARM_FEATURE_TME diff --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-mops.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s | FileCheck %s + +#define __ARM_FEATURE_MOPS 1 +#include +#include + +// CHECK-LABEL: @bzero_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_0(void *dst) { + return __arm_mops_memset_tag(dst, 0, 0); +} + +// CHECK-LABEL: @bzero_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_1(void *dst) { + return __arm_mops_memset_tag(dst, 0, 1); +} + +// CHECK-LABEL: @bzero_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10); +} + +// CHECK-LABEL: @bzero_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10000(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10000); +} + +// CHECK-LABEL: @bzero_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]]) +// CHECK-NEXT: ret i8* [[TMP2]] +// +void *bzero_n(void *dst, size_t size) { + return __arm_mops_memset_tag(dst, 0, size); +} + +// CHECK-LABEL: @memset_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_0(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 0); +} + +// CHECK-LABEL: @memset_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_1(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 1); +} + +// CHECK-LABEL: @memset_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10); +} + +// CHECK-LABEL: @memset_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10000(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10000); +} + +// CHECK-LABEL: @memset_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]]) +// CHECK-NEXT: ret i8* [[TMP4]] +// +void *memset_n(void *dst, int value, size_t size) { + return __arm_mops_memset_tag(dst, value, size); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -897,6 +897,14 @@ [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; } +//===----------------------------------------------------------------------===// +// Memory Operations (MOPS) Intrinsics +let TargetPrefix = "aarch64" in { + // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64 + def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; +} + // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,