Index: clang/lib/Basic/Targets/AArch64.h =================================================================== --- clang/lib/Basic/Targets/AArch64.h +++ clang/lib/Basic/Targets/AArch64.h @@ -87,6 +87,21 @@ ArrayRef getGCCRegNames() const override; ArrayRef getGCCRegAliases() const override; + + std::string convertConstraint(const char *&Constraint) const override { + std::string R; + switch (*Constraint) { + case 'U': // Three-character constraint; add "@3" hint for later parsing. + R = std::string("@3") + std::string(Constraint, 3); + Constraint += 2; + break; + default: + R = std::string(1, *Constraint); + break; + } + return R; + } + bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const override; bool Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -486,17 +486,29 @@ Info.setAllowsRegister(); return true; case 'U': + if (Name[1] == 'p' && (Name[2] == 'l' || Name[2] == 'a')) { + // SVE predicate registers ("Upa"=P0-15, "Upl"=P0-P7) + Info.setAllowsRegister(); + Name += 2; + return true; + } // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes. // Utf: A memory address suitable for ldp/stp in TF mode. // Usa: An absolute symbolic address. // Ush: The high part (bits 32:12) of a pc-relative symbolic address. - llvm_unreachable("FIXME: Unimplemented support for U* constraints."); + + // Better to return an error saying that it's an unrecognised constraint + // even if this is a valid constraint in gcc. + return false; case 'z': // Zero register, wzr or xzr Info.setAllowsRegister(); return true; case 'x': // Floating point and SIMD registers (V0-V15) Info.setAllowsRegister(); return true; + case 'y': // SVE registers (V0-V7) + Info.setAllowsRegister(); + return true; } return false; } Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -4498,7 +4498,7 @@ for (unsigned i = 0; i < IRCallArgs.size(); ++i) { if (auto *VT = dyn_cast(IRCallArgs[i]->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); } // Compute the calling convention and attributes. @@ -4613,7 +4613,7 @@ // Update largest vector width from the return type. if (auto *VT = dyn_cast(CI->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -2096,7 +2096,7 @@ // Update largest vector width for any vector types. if (auto *VT = dyn_cast(ResultRegTypes.back())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); } else { ArgTypes.push_back(Dest.getAddress(*this).getType()); Args.push_back(Dest.getPointer(*this)); @@ -2121,7 +2121,7 @@ // Update largest vector width for any vector types. if (auto *VT = dyn_cast(Arg->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2208,7 +2208,7 @@ // Update largest vector width for any vector types. if (auto *VT = dyn_cast(Arg->getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -496,12 +496,12 @@ for (llvm::Argument &A : CurFn->args()) if (auto *VT = dyn_cast(A.getType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Update vector width based on return type. if (auto *VT = dyn_cast(CurFn->getReturnType())) LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. Index: clang/test/CodeGen/aarch64-sve-inline-asm-crash.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-inline-asm-crash.c @@ -0,0 +1,22 @@ +// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns \ +// RUN: -target-feature +neon -S -O1 -o - %s 2>&1 | FileCheck %s + +// Set a vector constraint for an sve predicate register +// As the wrong constraint is used for an SVBool, +// the compiler will try to extend the nxv16i1 to an nxv16i8 +// TODO: We don't have patterns for this yet but once they are added this test +// should be updated to check for an assembler error +__SVBool_t funcB1(__SVBool_t in) +{ + __SVBool_t ret ; + asm volatile ( + "mov %[ret].b, %[in].b \n" + : [ret] "=w" (ret) + : [in] "w" (in) + :); + + return ret ; +} + +// CHECK: funcB1 +// CHECK-ERROR: fatal error: error in backend: Cannot select Index: clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c @@ -0,0 +1,209 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns \ +// RUN: -target-feature +neon -S -O1 -o - %s | FileCheck %s + +// Tests to check that all sve datatypes can be passed in as input operands +// and passed out as output operands. + +#define SVINT_TEST(DT, KIND)\ +DT func_int_##DT##KIND(DT in)\ +{\ + DT out;\ + asm volatile (\ + "ptrue p0.b\n"\ + "mov %[out]." #KIND ", p0/m, %[in]." #KIND "\n"\ + : [out] "=w" (out)\ + : [in] "w" (in)\ + : "p0"\ + );\ + return out;\ +} + +SVINT_TEST(__SVUint8_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVUint8_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVUint8_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVUint8_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVUint16_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVUint16_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVUint16_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVUint16_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVUint32_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVUint32_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVUint32_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVUint32_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVUint64_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVUint64_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVUint64_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVUint64_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVInt8_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVInt8_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVInt8_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVInt8_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVInt16_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVInt16_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVInt16_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVInt16_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVInt32_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVInt32_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVInt32_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVInt32_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVInt64_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVInt64_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVInt64_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVInt64_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + + +//Test that floats can also be used as datatypes for integer instructions +//and check all the variants which would not be possible with a float +//instruction +SVINT_TEST(__SVFloat16_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVFloat16_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVFloat16_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVFloat16_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVFloat32_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVFloat32_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVFloat32_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVFloat32_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + +SVINT_TEST(__SVFloat64_t,b); +// CHECK: mov {{z[0-9]+}}.b, p0/m, {{z[0-9]+}}.b +SVINT_TEST(__SVFloat64_t,h); +// CHECK: mov {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.h +SVINT_TEST(__SVFloat64_t,s); +// CHECK: mov {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s +SVINT_TEST(__SVFloat64_t,d); +// CHECK: mov {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d + + +#define SVBOOL_TEST(KIND)\ +__SVBool_t func_bool_##KIND(__SVBool_t in1, __SVBool_t in2)\ +{\ + __SVBool_t out;\ + asm volatile (\ + "zip1 %[out]." #KIND ", %[in1]." #KIND ", %[in2]." #KIND "\n"\ + : [out] "=Upa" (out)\ + : [in1] "Upa" (in1),\ + [in2] "Upl" (in2)\ + :);\ + return out;\ +} + +SVBOOL_TEST(b) ; +// CHECK: zip1 {{p[0-9]+}}.b, {{p[0-9]+}}.b, {{p[0-9]+}}.b +SVBOOL_TEST(h) ; +// CHECK: zip1 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h +SVBOOL_TEST(s) ; +// CHECK: zip1 {{p[0-9]+}}.s, {{p[0-9]+}}.s, {{p[0-9]+}}.s +SVBOOL_TEST(d) ; +// CHECK: zip1 {{p[0-9]+}}.d, {{p[0-9]+}}.d, {{p[0-9]+}}.d + + +#define SVFLOAT_TEST(DT,KIND)\ +DT func_float_##DT##KIND(DT inout1, DT in2)\ +{\ + asm volatile (\ + "ptrue p0." #KIND ", #1 \n"\ + "fsub %[inout1]." #KIND ", p0/m, %[inout1]." #KIND ", %[in2]." #KIND "\n"\ + : [inout1] "=w" (inout1)\ + : "[inout1]" (inout1),\ + [in2] "w" (in2)\ + : "p0");\ + return inout1 ;\ +}\ + +SVFLOAT_TEST(__SVFloat16_t,s); +// CHECK: fsub {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s +SVFLOAT_TEST(__SVFloat16_t,d); +// CHECK: fsub {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d, {{z[0-9]+}}.d + +SVFLOAT_TEST(__SVFloat32_t,s); +// CHECK: fsub {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s +SVFLOAT_TEST(__SVFloat32_t,d); +// CHECK: fsub {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d, {{z[0-9]+}}.d + +SVFLOAT_TEST(__SVFloat64_t,s); +// CHECK: fsub {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s +SVFLOAT_TEST(__SVFloat64_t,d); +// CHECK: fsub {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.d, {{z[0-9]+}}.d + + +// Another test for floats to include h suffix + +#define SVFLOAT_CVT_TEST(DT1,KIND1,DT2,KIND2)\ +DT1 func_float_cvt_##DT1##KIND1##DT2##KIND2(DT2 in1)\ +{\ + DT1 out1 ;\ + asm volatile (\ + "ptrue p0." #KIND2 ", #1 \n"\ + "fcvt %[out1]." #KIND1 ", p0/m, %[in1]." #KIND2 "\n"\ + : [out1] "=w" (out1)\ + : [in1] "w" (in1)\ + : "p0");\ + return out1 ;\ +}\ + +SVFLOAT_CVT_TEST(__SVFloat64_t,d,__SVFloat32_t,s); +// CHECK: fcvt {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.s +SVFLOAT_CVT_TEST(__SVFloat64_t,d,__SVFloat16_t,h); +// CHECK: fcvt {{z[0-9]+}}.d, p0/m, {{z[0-9]+}}.h +SVFLOAT_CVT_TEST(__SVFloat32_t,s,__SVFloat16_t,h); +// CHECK: fcvt {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.h +SVFLOAT_CVT_TEST(__SVFloat32_t,s,__SVFloat64_t,d); +// CHECK: fcvt {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.d +SVFLOAT_CVT_TEST(__SVFloat16_t,h,__SVFloat64_t,d); +// CHECK: fcvt {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.d +SVFLOAT_CVT_TEST(__SVFloat16_t,h,__SVFloat32_t,s); +// CHECK: fcvt {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.s + +//Test a mix of float and ints +SVFLOAT_CVT_TEST(__SVInt16_t,h,__SVFloat32_t,s); +// CHECK: fcvt {{z[0-9]+}}.h, p0/m, {{z[0-9]+}}.s +SVFLOAT_CVT_TEST(__SVFloat16_t,s,__SVUint32_t,d); +// CHECK: fcvt {{z[0-9]+}}.s, p0/m, {{z[0-9]+}}.d Index: clang/test/CodeGen/aarch64-sve-inline-asm-negative-test.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-inline-asm-negative-test.c @@ -0,0 +1,19 @@ +// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns \ +// RUN: -target-feature +neon -S -O1 -o - %s | FileCheck %s + +// Assembler error +// Output constraint : Set a vector constraint on an integer +__SVFloat32_t funcB2() +{ + __SVFloat32_t ret ; + asm volatile ( + "fmov %[ret], wzr \n" + : [ret] "=w" (ret) + : + :); + + return ret ; +} + +// CHECK: funcB2 +// CHECK-ERROR: error: invalid operand for instruction Index: clang/test/CodeGen/aarch64-sve-inline-asm-vec-low.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-inline-asm-vec-low.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -W -Wall -o - %s | FileCheck %s + +#define svfcmla_test_f16(Zda, Zn, Zm, rot, imm3) \ + ({ \ + __SVFloat16_t _Zn = Zn;\ + __SVFloat16_t _Zm = Zm;\ + __SVFloat16_t _res = Zda;\ + __asm__("fcmla %[__res].h, %[__Zn].h, %[__Zm].h[" #imm3 "], %[__rot]"\ + : [__res] "+w" (_res) \ + : [__Zn] "w" (_Zn), [__Zm] "y" (_Zm), [__rot] "i" (rot) \ + : \ + );\ + _res; \ + }) + + +// CHECK: fcmla {{z[0-9]+\.h}}, {{z[0-9]+\.h}}, {{z[0-7]\.h}}{{\[[0-9]+\]}}, #270 +__SVFloat16_t test_svfcmla_lane_f16(__SVFloat16_t aZda, __SVFloat16_t aZn, __SVFloat16_t aZm) { + return svfcmla_test_f16(aZda, aZn, aZm, 270, 0); +} + +#define svfcmla_test_f32(Zda, Zn, Zm, rot, imm3) \ + ({ \ + __SVFloat32_t _Zn = Zn;\ + __SVFloat32_t _Zm = Zm;\ + __SVFloat32_t _res = Zda;\ + __asm__("fcmla %[__res].s, %[__Zn].s, %[__Zm].s[" #imm3 "], %[__rot]"\ + : [__res] "+w" (_res) \ + : [__Zn] "w" (_Zn), [__Zm] "x" (_Zm), [__rot] "i" (rot) \ + : \ + );\ + _res; \ + }) + + +// CHECK: fcmla {{z[0-9]+\.s}}, {{z[0-9]+\.s}}, {{z[0-9][0-5]?\.s}}{{\[[0-9]+\]}}, #270 +__SVFloat32_t test_svfcmla_lane_f(__SVFloat32_t aZda, __SVFloat32_t aZn, __SVFloat32_t aZm) { + return svfcmla_test_f32(aZda, aZn, aZm, 270, 0); +}