diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -377,8 +377,10 @@ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); - if (Opts.ArmSveVectorBits) + if (Opts.ArmSveVectorBits) { Builder.defineMacro("__ARM_FEATURE_SVE_BITS", Twine(Opts.ArmSveVectorBits)); + Builder.defineMacro("__ARM_FEATURE_SVE_VECTOR_OPERATORS"); + } } ArrayRef AArch64TargetInfo::getTargetBuiltins() const { diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -0,0 +1,117 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=128 | FileCheck %s -D#VBITS=128 --check-prefixes=CHECK128 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=256 | FileCheck %s -D#VBITS=256 --check-prefixes=CHECK,CHECK256 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=512 | FileCheck %s -D#VBITS=512 --check-prefixes=CHECK,CHECK512 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=1024 | FileCheck %s -D#VBITS=1024 --check-prefixes=CHECK,CHECK1024 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=2048 | FileCheck %s -D#VBITS=2048 --check-prefixes=CHECK,CHECK2048 +// REQUIRES: aarch64-registered-target + +// Examples taken from section "3.7.3.3 Behavior specific to SVE +// vectors" of the SVE ACLE (Version 00bet6) that can be found at +// https://developer.arm.com/documentation/100987/latest +// +// Example has been expanded to work with mutiple values of +// -msve-vector-bits. + +#include + +// Page 27, item 1 +#if __ARM_FEATURE_SVE_BITS == 256 && __ARM_FEATURE_SVE_VECTOR_OPERATORS +// CHECK256-LABEL: @x256 = local_unnamed_addr global <4 x i64> , align 16 +typedef svint64_t vec256 __attribute__((arm_sve_vector_bits(256))); +vec256 x256 = {0, 1, 2, 3}; +#endif + +#if __ARM_FEATURE_SVE_BITS == 512 && __ARM_FEATURE_SVE_VECTOR_OPERATORS +// CHECK512-LABEL: @x512 = local_unnamed_addr global <8 x i64> , align 16 +typedef svint64_t vec512 __attribute__((arm_sve_vector_bits(512))); +vec512 x512 = {0, 1, 2, 3, 3 , 2 , 1, 0}; +#endif + +#if __ARM_FEATURE_SVE_BITS == 1024 && __ARM_FEATURE_SVE_VECTOR_OPERATORS +// CHECK1024-LABEL: @x1024 = local_unnamed_addr global <16 x i64> , align 16 +typedef svint64_t vec1024 __attribute__((arm_sve_vector_bits(1024))); +vec1024 x1024 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0}; +#endif + +#if __ARM_FEATURE_SVE_BITS == 2048 && __ARM_FEATURE_SVE_VECTOR_OPERATORS +// CHECK2048-LABEL: @x2048 = local_unnamed_addr global <32 x i64> , align 16 +typedef svint64_t vec2048 __attribute__((arm_sve_vector_bits(2048))); +vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0, + 0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0}; +#endif + +// Page 27, item 2. We can not change the ABI of existing vector +// types, including vec_int8. That's why in the SVE ACLE, VLST is +// distinct from, but mostly interchangeable with, the corresponding +// GNUT. VLST is treated for ABI purposes like an SVE type but GNUT +// continues to be a normal GNU vector type, with base Armv8-A PCS +// rules. +#if __ARM_FEATURE_SVE_BITS && __ARM_FEATURE_SVE_VECTOR_OPERATORS +#define N __ARM_FEATURE_SVE_BITS +typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); +// CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x) +// CHECK128-NEXT: entry: +// CHECK128-NEXT: %x.addr = alloca <16 x i8>, align 16 +// CHECK128-NEXT: %saved-call-rvalue = alloca , align 16 +// CHECK128-NEXT: store <16 x i8> %x, <16 x i8>* %x.addr, align 16 +// CHECK128-NEXT: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK128-NEXT: %1 = bitcast <16 x i8>* %x.addr to * +// CHECK128-NEXT: %2 = load , * %1, align 16 +// CHECK128-NEXT: %3 = call @llvm.aarch64.sve.asrd.nxv16i8( %0, %2, i32 1) +// CHECK128-NEXT: store %3, * %saved-call-rvalue, align 16 +// CHECK128-NEXT: %castFixedSve = bitcast * %saved-call-rvalue to <16 x i8>* +// CHECK128-NEXT: %4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16 +// CHECK128-NEXT: ret <16 x i8> %4 + +// CHECK-LABEL: define void @f2( +// CHECK-SAME: <[[#div(VBITS,8)]] x i8>* noalias nocapture sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* nocapture readonly %0) +// CHECK-NEXT: entry: +// CHECK-NEXT: %x.addr = alloca <[[#div(VBITS,8)]] x i8>, align 16 +// CHECK-NEXT: %saved-call-rvalue = alloca , align 16 +// CHECK-NEXT: %x = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* %0, align 16 +// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> %x, <[[#div(VBITS,8)]] x i8>* %x.addr, align 16 +// CHECK-NEXT: %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +// CHECK-NEXT: %2 = bitcast <[[#div(VBITS,8)]] x i8>* %x.addr to * +// CHECK-NEXT: %3 = load , * %2, align 16 +// CHECK-NEXT: %4 = call @llvm.aarch64.sve.asrd.nxv16i8( %1, %3, i32 1) +// CHECK-NEXT: store %4, * %saved-call-rvalue, align 16 +// CHECK-NEXT: %castFixedSve = bitcast * %saved-call-rvalue to <[[#div(VBITS,8)]] x i8>* +// CHECK-NEXT: %5 = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* %castFixedSve, align 16 +// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> %5, <[[#div(VBITS,8)]] x i8>* %agg.result, align 16 +// CHECK-NEXT: ret void +vec_int8 f2(vec_int8 x) { return svasrd_x(svptrue_b8(), x, 1); } +#endif + +// Page 27, item 3. +#if __ARM_FEATURE_SVE_BITS && __ARM_FEATURE_SVE_VECTOR_OPERATORS +#define N __ARM_FEATURE_SVE_BITS +typedef int8_t vec1 __attribute__((vector_size(N / 8))); +void f3(vec1); +typedef svint8_t vec2 __attribute__((arm_sve_vector_bits(N))); + +// CHECK128-LABEL: define void @g( %x.coerce) +// CHECK128-NEXT: entry: +// CHECK128-NEXT: %x = alloca <16 x i8>, align 16 +// CHECK128-NEXT: %0 = bitcast <16 x i8>* %x to * +// CHECK128-NEXT: store %x.coerce, * %0, align 16 +// CHECK128-NEXT: %x1 = load <16 x i8>, <16 x i8>* %x, align 16, +// CHECK128-NEXT: call void @f3(<16 x i8> %x1) #4 +// CHECK128-NEXT: ret void + +// CHECK-LABEL: define void @g( %x.coerce) +// CHECK-NEXT: entry: +// CHECK-NEXT: %x = alloca <[[#div(VBITS,8)]] x i8>, align 16 +// CHECK-NEXT: %indirect-arg-temp = alloca <[[#div(VBITS,8)]] x i8>, align 16 +// CHECK-NEXT: %0 = bitcast <[[#div(VBITS,8)]] x i8>* %x to * +// CHECK-NEXT: store %x.coerce, * %0 +// CHECK-NEXT: %x1 = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* %x, align 16 +// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> %x1, <[[#div(VBITS,8)]] x i8>* %indirect-arg-temp +// CHECK-NEXT: call void @f3(<[[#div(VBITS,8)]] x i8>* nonnull %indirect-arg-temp) +// CHECK-NEXT: ret void + +// CHECK128-LABEL: declare void @f3(<16 x i8>) + +// CHECK-LABEL: declare void @f3( +// CHECK-SAME: <[[#div(VBITS,8)]] x i8>*) +void g(vec2 x) { f3(x); } // OK +#endif diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=128 | FileCheck %s -D#VBITS=128 --check-prefixes=CHECK,CHECK128 +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=256 | FileCheck %s -D#VBITS=256 --check-prefixes=CHECK,CHECKWIDE +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=512 | FileCheck %s -D#VBITS=512 --check-prefixes=CHECK,CHECKWIDE +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=1024 | FileCheck %s -D#VBITS=1024 --check-prefixes=CHECK,CHECKWIDE +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s -msve-vector-bits=2048 | FileCheck %s -D#VBITS=2048 --check-prefixes=CHECK,CHECKWIDE +// REQUIRES: aarch64-registered-target + +// Examples taken from section "3.7.3.3 Behavior specific to SVE +// vectors" of the SVE ACLE (Version 00bet6) that can be found at +// https://developer.arm.com/documentation/100987/latest +// +// Example has been expanded to work with mutiple values of +// -msve-vector-bits. + +#include + +// Page 26, first paragraph of 3.7.3.3: sizeof and alignof +#if __ARM_FEATURE_SVE_BITS +#define N __ARM_FEATURE_SVE_BITS +typedef svfloat32_t fixed_svfloat __attribute__((arm_sve_vector_bits(N))); +void test01() { + static_assert(alignof(fixed_svfloat) == 16, + "Invalid align of Vector Length Specific Type."); + static_assert(sizeof(fixed_svfloat) == N / 8, + "Invalid size of Vector Length Specific Type."); +} +#endif + +// Page 26, items 1 and 2 of 3.7.3.3: how VLST and GNUT are related. +#if __ARM_FEATURE_SVE_BITS && __ARM_FEATURE_SVE_VECTOR_OPERATORS +#define N __ARM_FEATURE_SVE_BITS +typedef svfloat64_t fixed_svfloat64 __attribute__((arm_sve_vector_bits(N))); +typedef float64_t gnufloat64 __attribute__((vector_size(N / 8))); +void test02() { + static_assert(alignof(fixed_svfloat64) == alignof(gnufloat64), + "Align of Vector Length Specific Type and GNU Vector Types " + "should be the same."); + static_assert(sizeof(fixed_svfloat64) == sizeof(gnufloat64), + "Size of Vector Length Specific Type and GNU Vector Types " + "should be the same."); +} +#endif + +// Page 27, item 1. +#if __ARM_FEATURE_SVE_BITS && __ARM_FEATURE_SVE_VECTOR_OPERATORS +#define N __ARM_FEATURE_SVE_BITS +// CHECK-LABEL: define @_Z1f9__SVE_VLSIu11__SVInt32_tLj +// CHECK-SAME: [[#VBITS]] +// CHECK-SAME: EES_( %x.coerce, %y.coerce) +// CHECK-NEXT: entry: +// CHECK-NEXT: %x = alloca <[[#div(VBITS,32)]] x i32>, align 16 +// CHECK-NEXT: %y = alloca <[[#div(VBITS,32)]] x i32>, align 16 +// CHECK-NEXT: %retval.coerce = alloca , align 16 +// CHECK-NEXT: %0 = bitcast <[[#div(VBITS,32)]] x i32>* %x to * +// CHECK-NEXT: store %x.coerce, * %0, align 16 +// CHECK-NEXT: %x1 = load <[[#div(VBITS,32)]] x i32>, <[[#div(VBITS,32)]] x i32>* %x, align 16 +// CHECK-NEXT: %1 = bitcast <[[#div(VBITS,32)]] x i32>* %y to * +// CHECK-NEXT: store %y.coerce, * %1, align 16 +// CHECK-NEXT: %y2 = load <[[#div(VBITS,32)]] x i32>, <[[#div(VBITS,32)]] x i32>* %y, align 16 +// CHECK-NEXT: %add = add <[[#div(VBITS,32)]] x i32> %y2, %x1 +// CHECK-NEXT: %retval.0..sroa_cast = bitcast * %retval.coerce to <[[#div(VBITS,32)]] x i32>* +// CHECK-NEXT: store <[[#div(VBITS,32)]] x i32> %add, <[[#div(VBITS,32)]] x i32>* %retval.0..sroa_cast, align 16 +// CHECK-NEXT: %2 = load , * %retval.coerce, align 16 +// CHECK-NEXT: ret %2 +typedef svint32_t vec __attribute__((arm_sve_vector_bits(N))); +auto f(vec x, vec y) { return x + y; } // Returns a vec. +#endif + +// Page 27, item 3, adapted for a generic value of __ARM_FEATURE_SVE_BITS +#if __ARM_FEATURE_SVE_BITS && __ARM_FEATURE_SVE_VECTOR_OPERATORS +#define N __ARM_FEATURE_SVE_BITS +typedef int16_t vec1 __attribute__((vector_size(N / 8))); +void f(vec1); +typedef svint16_t vec2 __attribute__((arm_sve_vector_bits(N))); +// CHECK-LABEL: define void @_Z1g9__SVE_VLSIu11__SVInt16_tLj +// CHECK-SAME: [[#VBITS]] +// CHECK-SAME: EE( %x.coerce) +// CHECK-NEXT: entry: +// CHECK128-NEXT: %x = alloca <[[#div(VBITS,16)]] x i16>, align 16 +// CHECK128-NEXT: %0 = bitcast <[[#div(VBITS,16)]] x i16>* %x to * +// CHECK128-NEXT: store %x.coerce, * %0, align 16 +// CHECK128-NEXT: %x1 = load <[[#div(VBITS,16)]] x i16>, <[[#div(VBITS,16)]] x i16>* %x, align 16 +// CHECK128-NEXT: call void @_Z1fDv[[#div(VBITS,16)]]_s(<[[#div(VBITS,16)]] x i16> %x1) +// CHECK128-NEXT: ret void +// CHECKWIDE-NEXT: %x = alloca <[[#div(VBITS,16)]] x i16>, align 16 +// CHECKWIDE-NEXT: %indirect-arg-temp = alloca <[[#div(VBITS,16)]] x i16>, align 16 +// CHECKWIDE-NEXT: %0 = bitcast <[[#div(VBITS,16)]] x i16>* %x to * +// CHECKWIDE-NEXT: store %x.coerce, * %0, align 16 +// CHECKWIDE-NEXT: %x1 = load <[[#div(VBITS,16)]] x i16>, <[[#div(VBITS,16)]] x i16>* %x, align 16 +// CHECKWIDE-NEXT: store <[[#div(VBITS,16)]] x i16> %x1, <[[#div(VBITS,16)]] x i16>* %indirect-arg-temp, align 16 +// CHECKWIDE-NEXT: call void @_Z1fDv[[#div(VBITS,16)]]_s(<[[#div(VBITS,16)]] x i16>* nonnull %indirect-arg-temp) +// CHECKWIDE-NEXT: ret void +void g(vec2 x) { f(x); } // OK +#endif diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -440,14 +440,10 @@ // CHECK-BFLOAT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 // ================== Check sve-vector-bits flag. -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=128 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-128 %s -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=256 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-256 %s -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=512 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-512 %s -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-1024 %s -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s -// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS 128 -// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS 256 -// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS 512 -// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS 1024 -// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS 2048 +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=128 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=128 %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=256 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=256 %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=512 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=512 %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=1024 %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=2048 %s +// CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_BITS [[#VBITS:]] +// CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_VECTOR_OPERATORS 1