diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1869,4 +1869,7 @@ def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; +def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; +def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; + } diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -61,7 +61,8 @@ // ------------------- // prototype: return (arg, arg, ...) // -// 2,3,4: array of default vectors +// 2,3,4: array of vectors +// .: indicator for multi-vector modifier that will follow (e.g. 2.x) // v: void // x: vector of signed integers // u: vector of unsigned integers diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9832,6 +9832,41 @@ return Call; } +Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { + // Multi-vector results should be broken up into a single (wide) result + // vector. + auto *StructTy = dyn_cast(Call->getType()); + if (!StructTy) + return Call; + + auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); + if (!VTy) + return Call; + unsigned N = StructTy->getNumElements(); + + // We may need to emit a cast to a svbool_t + bool IsPredTy = VTy->getElementType()->isIntegerTy(1); + unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); + + ScalableVectorType *WideVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts * N); + Value *Ret = llvm::PoisonValue::get(WideVTy); + for (unsigned I = 0; I < N; ++I) { + Value *SRet = Builder.CreateExtractValue(Call, I); + assert(SRet->getType() == VTy && "Unexpected type for result value"); + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + + if (IsPredTy) + SRet = EmitSVEPredicateCast( + SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); + + Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); + } + Call = Ret; + + return Call; +} + Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Find out if any arguments are required to be integer constant expressions. @@ -9945,7 +9980,7 @@ if (PredTy->getScalarType()->isIntegerTy(1)) Call = EmitSVEPredicateCast(Call, cast(Ty)); - return Call; + return FormSVEBuiltinResult(Call); } switch (BuiltinID) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4293,6 +4293,11 @@ llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); + /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider + /// vector. It extracts the scalable vector from the struct and inserts into + /// the wider vector. This avoids the error when allocating space in llvm + /// for struct of scalable vectors if a function returns struct. + llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -0,0 +1,152 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_svpext_lane_c8_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_0(svcount_t c) { + return svpext_lane_c8(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c8_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_3(svcount_t c) { + return svpext_lane_c8(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_0(svcount_t c) { + return svpext_lane_c16(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c16_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_3(svcount_t c) { + return svpext_lane_c16(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c32_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_0(svcount_t c) { + return svpext_lane_c32(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c32_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_3(svcount_t c) { + return svpext_lane_c32(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c64_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_0(svcount_t c) { + return svpext_lane_c64(c, 0); +} + +// CHECK-LABEL: @test_svpext_lane_c64_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_3(svcount_t c) { + return svpext_lane_c64(c, 3); +} + +// CHECK-LABEL: @test_svpext_lane_c8_x2_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) { + return svpext_lane_c8_x2(c, 0); +} diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp --- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp @@ -3,6 +3,29 @@ // REQUIRES: aarch14-registered-target #include +void test_svpext_lane_imm_0_3(svcount_t c) { + svpext_lane_c8(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c16(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c32(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c64(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svpext_lane_c8(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c16(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c32(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c64(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +} + +void test_svpext_lane_x2_imm_0_1(svcount_t c) { + svpext_lane_c8_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c16_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c32_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svpext_lane_c64_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + + svpext_lane_c8_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c16_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c32_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svpext_lane_c64_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +} void test_cntp(svcount_t c) { svcntp_c8(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -73,12 +73,12 @@ public: SVEType() : SVEType(TypeSpec(), 'v') {} - SVEType(TypeSpec TS, char CharMod) + SVEType(TypeSpec TS, char CharMod, unsigned NumVectors = 1) : TS(TS), Float(false), Signed(true), Immediate(false), Void(false), Constant(false), Pointer(false), BFloat(false), DefaultType(false), IsScalable(true), Predicate(false), PredicatePattern(false), PrefetchOp(false), Svcount(false), Bitwidth(128), ElementBitwidth(~0U), - NumVectors(1) { + NumVectors(NumVectors) { if (!TS.empty()) applyTypespec(); applyModifier(CharMod); @@ -194,7 +194,9 @@ SVEType getReturnType() const { return Types[0]; } ArrayRef getTypes() const { return Types; } SVEType getParamType(unsigned I) const { return Types[I + 1]; } - unsigned getNumParams() const { return Proto.size() - 1; } + unsigned getNumParams() const { + return Proto.size() - (2 * std::count(Proto.begin(), Proto.end(), '.')) - 1; + } uint64_t getFlags() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag;} @@ -228,11 +230,19 @@ /// Return the parameter index of the splat operand. unsigned getSplatIdx() const { - // These prototype modifiers are described in arm_sve.td. - auto Idx = Proto.find_first_of("ajfrKLR@"); - assert(Idx != std::string::npos && Idx > 0 && - "Prototype has no splat operand"); - return Idx - 1; + unsigned I = 1, Param = 0; + for (; I < Proto.size(); ++I, ++Param) { + if (Proto[I] == 'a' || Proto[I] == 'j' || Proto[I] == 'f' || + Proto[I] == 'r' || Proto[I] == 'K' || Proto[I] == 'L' || + Proto[I] == 'R' || Proto[I] == '@') + break; + + // Multivector modifier can be skipped + if (Proto[I] == '.') + I += 2; + } + assert(I != Proto.size() && "Prototype has no splat operand"); + return Param; } /// Emits the intrinsic declaration to the ostream. @@ -540,15 +550,6 @@ void SVEType::applyModifier(char Mod) { switch (Mod) { - case '2': - NumVectors = 2; - break; - case '3': - NumVectors = 3; - break; - case '4': - NumVectors = 4; - break; case 'v': Void = true; break; @@ -859,11 +860,36 @@ Float = false; BFloat = false; break; + case '.': + llvm_unreachable(". is never a type in itself"); + break; default: llvm_unreachable("Unhandled character!"); } } +/// Returns the modifier and number of vectors for the given operand \p Op. +std::pair getProtoModifier(StringRef Proto, unsigned Op) { + for (unsigned P = 0; !Proto.empty(); ++P) { + unsigned NumVectors = 1; + unsigned CharsToSkip = 1; + char Mod = Proto[0]; + if (Mod == '2' || Mod == '3' || Mod == '4') { + NumVectors = Mod - '0'; + Mod = 'd'; + if (Proto.size() > 1 && Proto[1] == '.') { + Mod = Proto[2]; + CharsToSkip = 3; + } + } + + if (P == Op) + return {Mod, NumVectors}; + + Proto = Proto.drop_front(CharsToSkip); + } + llvm_unreachable("Unexpected Op"); +} //===----------------------------------------------------------------------===// // Intrinsic implementation @@ -879,8 +905,11 @@ MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), ImmChecks(Checks.begin(), Checks.end()) { // Types[0] is the return value. - for (unsigned I = 0; I < Proto.size(); ++I) { - SVEType T(BaseTypeSpec, Proto[I]); + for (unsigned I = 0; I < (getNumParams() + 1); ++I) { + char Mod; + unsigned NumVectors; + std::tie(Mod, NumVectors) = getProtoModifier(Proto, I); + SVEType T(BaseTypeSpec, Mod, NumVectors); Types.push_back(T); // Add range checks for immediates @@ -1124,10 +1153,11 @@ assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative"); unsigned ElementSizeInBits = 0; + char Mod; + unsigned NumVectors; + std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArg + 1); if (EltSizeArg >= 0) - ElementSizeInBits = - SVEType(TS, Proto[EltSizeArg + /* offset by return arg */ 1]) - .getElementSizeInBits(); + ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits(); ImmChecks.push_back(ImmCheck(Arg, Kind, ElementSizeInBits)); }