diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -226,6 +226,8 @@ void AArch64TargetInfo::getTargetDefinesARMV88A(const LangOptions &Opts, MacroBuilder &Builder) const { + // FIXME: this does not handle the case where MOPS is disabled using +nomops + Builder.defineMacro("__ARM_FEATURE_MOPS", "1"); // Also include the Armv8.7 defines getTargetDefinesARMV87A(Opts, Builder); } @@ -435,6 +437,9 @@ if (HasRandGen) Builder.defineMacro("__ARM_FEATURE_RNG", "1"); + if (HasMOPS) + Builder.defineMacro("__ARM_FEATURE_MOPS", "1"); + switch (ArchKind) { default: break; @@ -662,6 +667,8 @@ HasFlagM = true; if (Feature == "+hbc") HasHBC = true; + if (Feature == "+mops") + HasMOPS = true; } setDataLayout(); diff --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c --- a/clang/test/CodeGen/aarch64-mops.c +++ b/clang/test/CodeGen/aarch64-mops.c @@ -1,152 +1,277 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOMOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a+mops -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+mops -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s -#define __ARM_FEATURE_MOPS 1 #include #include -// CHECK-LABEL: @bzero_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0) -// CHECK-NEXT: ret i8* [[TMP1]] +// CHECK-MOPS-LABEL: @bzero_0( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0) +// CHECK-MOPS-NEXT: ret i8* [[TMP1]] +// +// CHECK-NOMOPS-LABEL: @bzero_0( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 0) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP1]] // void *bzero_0(void *dst) { return __arm_mops_memset_tag(dst, 0, 0); } -// CHECK-LABEL: @bzero_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1) -// CHECK-NEXT: ret i8* [[TMP1]] +// CHECK-MOPS-LABEL: @bzero_1( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1) +// CHECK-MOPS-NEXT: ret i8* [[TMP1]] +// +// CHECK-NOMOPS-LABEL: @bzero_1( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 1) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP1]] // void *bzero_1(void *dst) { return __arm_mops_memset_tag(dst, 0, 1); } -// CHECK-LABEL: @bzero_10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10) -// CHECK-NEXT: ret i8* [[TMP1]] +// CHECK-MOPS-LABEL: @bzero_10( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10) +// CHECK-MOPS-NEXT: ret i8* [[TMP1]] +// +// CHECK-NOMOPS-LABEL: @bzero_10( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 10) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP1]] // void *bzero_10(void *dst) { return __arm_mops_memset_tag(dst, 0, 10); } -// CHECK-LABEL: @bzero_10000( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000) -// CHECK-NEXT: ret i8* [[TMP1]] +// CHECK-MOPS-LABEL: @bzero_10000( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000) +// CHECK-MOPS-NEXT: ret i8* [[TMP1]] +// +// CHECK-NOMOPS-LABEL: @bzero_10000( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 10000) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP1]] // void *bzero_10000(void *dst) { return __arm_mops_memset_tag(dst, 0, 10000); } -// CHECK-LABEL: @bzero_n( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]]) -// CHECK-NEXT: ret i8* [[TMP2]] +// CHECK-MOPS-LABEL: @bzero_n( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]]) +// CHECK-MOPS-NEXT: ret i8* [[TMP2]] +// +// CHECK-NOMOPS-LABEL: @bzero_n( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i64)*)(i8* noundef [[TMP0]], i32 noundef 0, i64 noundef [[TMP1]]) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP2]] // void *bzero_n(void *dst, size_t size) { return __arm_mops_memset_tag(dst, 0, size); } -// CHECK-LABEL: @memset_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0) -// CHECK-NEXT: ret i8* [[TMP3]] +// CHECK-MOPS-LABEL: @memset_0( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-MOPS-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0) +// CHECK-MOPS-NEXT: ret i8* [[TMP3]] +// +// CHECK-NOMOPS-LABEL: @memset_0( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 0) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP2]] // void *memset_0(void *dst, int value) { return __arm_mops_memset_tag(dst, value, 0); } -// CHECK-LABEL: @memset_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1) -// CHECK-NEXT: ret i8* [[TMP3]] +// CHECK-MOPS-LABEL: @memset_1( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-MOPS-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1) +// CHECK-MOPS-NEXT: ret i8* [[TMP3]] +// +// CHECK-NOMOPS-LABEL: @memset_1( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 1) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP2]] // void *memset_1(void *dst, int value) { return __arm_mops_memset_tag(dst, value, 1); } -// CHECK-LABEL: @memset_10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10) -// CHECK-NEXT: ret i8* [[TMP3]] +// CHECK-MOPS-LABEL: @memset_10( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-MOPS-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10) +// CHECK-MOPS-NEXT: ret i8* [[TMP3]] +// +// CHECK-NOMOPS-LABEL: @memset_10( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 10) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP2]] // void *memset_10(void *dst, int value) { return __arm_mops_memset_tag(dst, value, 10); } -// CHECK-LABEL: @memset_10000( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000) -// CHECK-NEXT: ret i8* [[TMP3]] +// CHECK-MOPS-LABEL: @memset_10000( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-MOPS-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000) +// CHECK-MOPS-NEXT: ret i8* [[TMP3]] +// +// CHECK-NOMOPS-LABEL: @memset_10000( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 10000) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP2]] // void *memset_10000(void *dst, int value) { return __arm_mops_memset_tag(dst, value, 10000); } -// CHECK-LABEL: @memset_n( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]]) -// CHECK-NEXT: ret i8* [[TMP4]] +// CHECK-MOPS-LABEL: @memset_n( +// CHECK-MOPS: entry: +// CHECK-MOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-MOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-MOPS-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-MOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-MOPS-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-MOPS-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-MOPS-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]]) +// CHECK-MOPS-NEXT: ret i8* [[TMP4]] +// +// CHECK-NOMOPS-LABEL: @memset_n( +// CHECK-NOMOPS: entry: +// CHECK-NOMOPS-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NOMOPS-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOMOPS-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NOMOPS-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NOMOPS-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NOMOPS-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i64)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i64 noundef [[TMP2]]) +// CHECK-NOMOPS-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 +// CHECK-NOMOPS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[CONV]] to i8* +// CHECK-NOMOPS-NEXT: ret i8* [[TMP3]] // void *memset_n(void *dst, int value, size_t size) { return __arm_mops_memset_tag(dst, value, size); diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -516,3 +516,15 @@ // RUN: %clang -target aarch64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s // RUN: %clang -target arm64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s // CHECK-LSE: __ARM_FEATURE_ATOMICS 1 + +// ================== Check Armv8.8-A/Armv9.3-A memcpy and memset acceleration instructions (MOPS) +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s +// CHECK-MOPS: __ARM_FEATURE_MOPS 1 +// CHECK-NOMOPS-NOT: __ARM_FEATURE_MOPS 1