Index: clang/include/clang/Basic/TargetBuiltins.h =================================================================== --- clang/include/clang/Basic/TargetBuiltins.h +++ clang/include/clang/Basic/TargetBuiltins.h @@ -193,6 +193,12 @@ } bool isLoad() const { return Flags & IsLoad; } + bool isStore() const { return Flags & IsStore; } + bool isGatherLoad() const { return Flags & IsGatherLoad; } + bool isScatterStore() const { return Flags & IsScatterStore; } + bool isStructLoad() const { return Flags & IsStructLoad; } + bool isStructStore() const { return Flags & IsStructStore; } + bool isZExtReturn() const { return Flags & IsZExtReturn; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -62,6 +62,28 @@ // c: const pointer type // P: predicate type +// l: int64_t + +// A: pointer to int8_t +// B: pointer to int16_t +// C: pointer to int32_t +// D: pointer to int64_t + +// E: pointer to uint8_t +// F: pointer to uint16_t +// G: pointer to uint32_t +// H: pointer to uint64_t + +// S: const pointer to int8_t +// T: const pointer to int16_t +// U: const pointer to int32_t +// V: const pointer to int64_t +// +// W: const pointer to uint8_t +// X: const pointer to uint16_t +// Y: const pointer to uint32_t +// Z: const pointer to uint64_t + class MergeType<int val> { int Value = val; } @@ -113,6 +135,12 @@ // : : def MemEltTypeMask : FlagType<0x00000070>; def IsLoad : FlagType<0x00000080>; +def IsStore : FlagType<0x00000100>; +def IsGatherLoad : FlagType<0x00000200>; +def IsScatterStore : FlagType<0x00000400>; +def IsStructLoad : FlagType<0x00000800>; +def IsStructStore : FlagType<0x00001000>; +def IsZExtReturn : FlagType<0x00002000>; // Return value is sign-extend by default // Every intrinsic subclasses Inst. class Inst<string n, string p, string t, MergeType mt, string i, @@ -137,3 +165,88 @@ // Load one vector (scalar base) def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad]>; +def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8>; +def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8>; +def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16>; +def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16>; +def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32>; +def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32>; + +// Load one vector (scalar base, VL displacement) +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad]>; +def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8>; +def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8>; +def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16>; +def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16>; +def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32>; +def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32>; + +// First-faulting load one vector (scalar base) +def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1UH : MInst<"svldff1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; +def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; + +// First-faulting load one vector (scalar base, VL displacement) +def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; +def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; + +// Non-faulting load one vector (scalar base) +def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1UH : MInst<"svldnf1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; +def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; + +// Non-faulting load one vector (scalar base, VL displacement) +def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; +def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; + +// Load one vector, unextended load, non-temporal (scalar base) +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; + +// Load one vector, unextended load, non-temporal (scalar base, VL displacement) +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; + + +//////////////////////////////////////////////////////////////////////////////// +// Stores + +// Store one vector (scalar base) +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore]>; +def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore], MemEltTyInt8>; +def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore], MemEltTyInt8>; +def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore], MemEltTyInt16>; +def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore], MemEltTyInt16>; +def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore], MemEltTyInt32>; +def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore], MemEltTyInt32>; + +// Store one vector (scalar base, VL displacement) +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore]>; +def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore], MemEltTyInt8>; +def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore], MemEltTyInt8>; +def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore], MemEltTyInt16>; +def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore], MemEltTyInt16>; +def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore], MemEltTyInt32>; +def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore], MemEltTyInt32>; + +// Store one vector, with no truncation, non-temporal (scalar base) +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + +// Store one vector, with no truncation, non-temporal (scalar base, VL displacement) +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -7464,10 +7464,14 @@ return C; } -Value *CodeGenFunction::EmitSVEMaskedLoad(llvm::Type *ReturnTy, - SmallVectorImpl<Value *> &Ops) { - llvm::PointerType *PTy = cast<llvm::PointerType>(Ops[1]->getType()); - llvm::Type *MemEltTy = PTy->getPointerElementType(); +Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, + llvm::Type *ReturnTy, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID, + bool IsZExtReturn) { + QualType LangPTy = E->getArg(1)->getType(); + llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + LangPTy->getAs<PointerType>()->getPointeeType()); // The vector type that is returned may be different from the // eventual type loaded from memory. @@ -7475,13 +7479,54 @@ auto MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getVectorElementCount()); - Value *Offset = Builder.getInt32(0); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); Value *Splat0 = Constant::getNullValue(MemoryTy); - return Builder.CreateMaskedLoad(BasePtr, Align(1), Predicate, Splat0); + + Value *Load = nullptr; + if (!BuiltinID) + // Regular masked loads take a different path from the SVE-specific ones. + Load = Builder.CreateMaskedLoad(BasePtr, llvm::Align(1), Predicate, Splat0); + else { + BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); + Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Load = Builder.CreateCall(F, {Predicate, BasePtr}); + } + + return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) + : Builder.CreateSExt(Load, VectorTy); +} + +Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID) { + QualType LangPTy = E->getArg(1)->getType(); + llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + LangPTy->getAs<PointerType>()->getPointeeType()); + + // The vector type that is stored may be different from the + // eventual type stored to memory. + auto VectorTy = cast<llvm::VectorType>(Ops.back()->getType()); + auto MemoryTy = + llvm::VectorType::get(MemEltTy, VectorTy->getVectorElementCount()); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0); + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); + + // Last value is always the data + llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); + + if (!BuiltinID) + return Builder.CreateMaskedStore(Val, BasePtr, llvm::Align(1), Predicate); + + BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); + Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + return Builder.CreateCall(F, {Val, Predicate, BasePtr}); } Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, @@ -7505,7 +7550,10 @@ SVETypeFlags TypeFlags(Builtin->TypeModifier); llvm::Type *Ty = ConvertType(E->getType()); if (TypeFlags.isLoad()) - return EmitSVEMaskedLoad(Ty, Ops); + return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, + TypeFlags.isZExtReturn()); + else if (TypeFlags.isStore()) + return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3904,8 +3904,12 @@ llvm::Value *vectorWrapScalar16(llvm::Value *Op); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::VectorType *VTy); - llvm::Value *EmitSVEMaskedLoad(llvm::Type *ReturnTy, - SmallVectorImpl<llvm::Value *> &Ops); + llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID, bool IsZExtReturn); + llvm::Value *EmitSVEMaskedStore(const CallExpr *, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c @@ -1,83 +1,197 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s -D__ARM_FEATURE_SVE | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s #include <arm_sve.h> -// -// ld1 -// + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif svint8_t test_svld1_s8(svbool_t pg, const int8_t *base) { // CHECK-LABEL: test_svld1_s8 - // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) - return svld1_s8(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_s8,,)(pg, base); } svint16_t test_svld1_s16(svbool_t pg, const int16_t *base) { // CHECK-LABEL: test_svld1_s16 - // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) - return svld1_s16(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_s16,,)(pg, base); } svint32_t test_svld1_s32(svbool_t pg, const int32_t *base) { // CHECK-LABEL: test_svld1_s32 - // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) - return svld1_s32(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_s32,,)(pg, base); } svint64_t test_svld1_s64(svbool_t pg, const int64_t *base) { // CHECK-LABEL: test_svld1_s64 - // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) - return svld1_s64(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_s64,,)(pg, base); } svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base) { // CHECK-LABEL: test_svld1_u8 - // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) - return svld1_u8(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_u8,,)(pg, base); } svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base) { // CHECK-LABEL: test_svld1_u16 - // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) - return svld1_u16(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_u16,,)(pg, base); } svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base) { // CHECK-LABEL: test_svld1_u32 - // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) - return svld1_u32(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_u32,,)(pg, base); } svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base) { // CHECK-LABEL: test_svld1_u64 - // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) - return svld1_u64(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_u64,,)(pg, base); } svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base) { // CHECK-LABEL: test_svld1_f16 - // CHECK: <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer) - return svld1_f16(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_f16,,)(pg, base); } svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base) { // CHECK-LABEL: test_svld1_f32 - // CHECK: <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer) - return svld1_f32(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_f32,,)(pg, base); } svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) { // CHECK-LABEL: test_svld1_f64 - // CHECK: <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer) - return svld1_f64(pg, base); + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base); +} +svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_s8 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_s8,,)(pg, base, vnum); +} + +svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_s16 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_s16,,)(pg, base, vnum); +} + +svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_s32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_s32,,)(pg, base, vnum); +} + +svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_s64,,)(pg, base, vnum); +} + +svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_u8 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_u8,,)(pg, base, vnum); +} + +svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_u16 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_u16,,)(pg, base, vnum); +} + +svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_u32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_u32,,)(pg, base, vnum); +} + +svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_f16 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %[[GEP]], i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_f32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1_vnum_f64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum); } Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c +++ /dev/null @@ -1,83 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s -D__ARM_FEATURE_SVE | FileCheck %s - -#include <arm_sve.h> -// -// ld1 -// - -svint8_t test_svld1_s8(svbool_t pg, const int8_t *base) -{ - // CHECK-LABEL: test_svld1_s8 - // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) - return svld1(pg, base); -} - -svint16_t test_svld1_s16(svbool_t pg, const int16_t *base) -{ - // CHECK-LABEL: test_svld1_s16 - // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) - return svld1(pg, base); -} - -svint32_t test_svld1_s32(svbool_t pg, const int32_t *base) -{ - // CHECK-LABEL: test_svld1_s32 - // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) - return svld1(pg, base); -} - -svint64_t test_svld1_s64(svbool_t pg, const int64_t *base) -{ - // CHECK-LABEL: test_svld1_s64 - // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) - return svld1(pg, base); -} - -svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base) -{ - // CHECK-LABEL: test_svld1_u8 - // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer) - return svld1(pg, base); -} - -svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base) -{ - // CHECK-LABEL: test_svld1_u16 - // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer) - return svld1(pg, base); -} - -svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base) -{ - // CHECK-LABEL: test_svld1_u32 - // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer) - return svld1(pg, base); -} - -svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base) -{ - // CHECK-LABEL: test_svld1_u64 - // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer) - return svld1(pg, base); -} - -svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base) -{ - // CHECK-LABEL: test_svld1_f16 - // CHECK: <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer) - return svld1(pg, base); -} - -svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base) -{ - // CHECK-LABEL: test_svld1_f32 - // CHECK: <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer) - return svld1(pg, base); -} - -svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) -{ - // CHECK-LABEL: test_svld1_f64 - // CHECK: <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer) - return svld1(pg, base); -} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld1sh_s32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1sh_s32(pg, base); +} + +svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld1sh_s64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1sh_s64(pg, base); +} + +svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld1sh_u32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1sh_u32(pg, base); +} + +svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld1sh_u64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1sh_u64(pg, base); +} + +svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sh_vnum_s32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1sh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sh_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1sh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sh_vnum_u32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1sh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sh_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1sh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld1sw_s64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1sw_s64(pg, base); +} + +svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld1sw_u64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1sw_u64(pg, base); +} + +svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sw_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1sw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1sw_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1sw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c @@ -0,0 +1,119 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_s16 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i8> zeroinitializer) + // CHECK: zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + return svld1ub_s16(pg, base); +} + +svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_s32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i8> zeroinitializer) + // CHECK: zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + return svld1ub_s32(pg, base); +} + +svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_s64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i8> zeroinitializer) + // CHECK: zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + return svld1ub_s64(pg, base); +} + +svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_u16 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i8> zeroinitializer) + // CHECK: zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + return svld1ub_u16(pg, base); +} + +svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_u32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i8> zeroinitializer) + // CHECK: zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + return svld1ub_u32(pg, base); +} + +svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1ub_u64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i8> zeroinitializer) + // CHECK: zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + return svld1ub_u64(pg, base); +} + +svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_s16 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i8> zeroinitializer) + // CHECK: zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + return svld1ub_vnum_s16(pg, base, vnum); +} + +svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_s32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i8> zeroinitializer) + // CHECK: zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + return svld1ub_vnum_s32(pg, base, vnum); +} + +svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i8> zeroinitializer) + // CHECK: zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + return svld1ub_vnum_s64(pg, base, vnum); +} + +svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_u16 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i8> zeroinitializer) + // CHECK: zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + return svld1ub_vnum_u16(pg, base, vnum); +} + +svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_u32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i8> zeroinitializer) + // CHECK: zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + return svld1ub_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1ub_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i8> zeroinitializer) + // CHECK: zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + return svld1ub_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld1uh_s32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1uh_s32(pg, base); +} + +svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld1uh_s64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1uh_s64(pg, base); +} + +svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld1uh_u32 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1uh_u32(pg, base); +} + +svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld1uh_u64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1uh_u64(pg, base); +} + +svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uh_vnum_s32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1uh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uh_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1uh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uh_vnum_u32 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i16> zeroinitializer) + // CHECK: zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + return svld1uh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uh_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i16> zeroinitializer) + // CHECK: zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + return svld1uh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld1uw_s64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1uw_s64(pg, base); +} + +svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld1uw_u64 + // CHECK-NOT: getelementptr + // CHECK: %[[LOAD:.*]] = {{.*}} @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1uw_u64(pg, base); +} + +svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uw_vnum_s64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1uw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld1uw_vnum_u64 + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i32> zeroinitializer) + // CHECK: zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + return svld1uw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svldff1_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svldff1_s8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_s8,,)(pg, base); +} + +svint16_t test_svldff1_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldff1_s16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_s16,,)(pg, base); +} + +svint32_t test_svldff1_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldff1_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_s32,,)(pg, base); +} + +svint64_t test_svldff1_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svldff1_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_s64,,)(pg, base); +} + +svuint8_t test_svldff1_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1_u8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_u8,,)(pg, base); +} + +svuint16_t test_svldff1_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldff1_u16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_u16,,)(pg, base); +} + +svuint32_t test_svldff1_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldff1_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_u32,,)(pg, base); +} + +svuint64_t test_svldff1_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svldff1_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_u64,,)(pg, base); +} + +svfloat16_t test_svldff1_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svldff1_f16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_f16,,)(pg, base); +} + +svfloat32_t test_svldff1_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svldff1_f32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_f32,,)(pg, base); +} + +svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svldff1_f64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1,_f64,,)(pg, base); +} + +svint8_t test_svldff1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_s8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_s8,,)(pg, base, vnum); +} + +svint16_t test_svldff1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_s16,,)(pg, base, vnum); +} + +svint32_t test_svldff1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_s32,,)(pg, base, vnum); +} + +svint64_t test_svldff1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_s64,,)(pg, base, vnum); +} + +svuint8_t test_svldff1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_u8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_u8,,)(pg, base, vnum); +} + +svuint16_t test_svldff1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_u16,,)(pg, base, vnum); +} + +svuint32_t test_svldff1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_u32,,)(pg, base, vnum); +} + +svuint64_t test_svldff1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16_t test_svldff1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]]) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32_t test_svldff1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]]) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64_t test_svldff1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]]) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldff1_vnum,_f64,,)(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint32_t test_svldff1sh_s32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldff1sh_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldff1sh_s32(pg, base); +} + +svint64_t test_svldff1sh_s64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldff1sh_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sh_s64(pg, base); +} + +svuint32_t test_svldff1sh_u32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldff1sh_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldff1sh_u32(pg, base); +} + +svuint64_t test_svldff1sh_u64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldff1sh_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sh_u64(pg, base); +} + +svint32_t test_svldff1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sh_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldff1sh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldff1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sh_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svldff1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sh_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldff1sh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldff1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sh_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint64_t test_svldff1sw_s64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldff1sw_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sw_s64(pg, base); +} + +svuint64_t test_svldff1sw_u64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldff1sw_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sw_u64(pg, base); +} + +svint64_t test_svldff1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sw_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svldff1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1sw_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldff1sw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c @@ -0,0 +1,135 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint16_t test_svldff1ub_s16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_s16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldff1ub_s16(pg, base); +} + +svint32_t test_svldff1ub_s32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1ub_s32(pg, base); +} + +svint64_t test_svldff1ub_s64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1ub_s64(pg, base); +} + +svuint16_t test_svldff1ub_u16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_u16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldff1ub_u16(pg, base); +} + +svuint32_t test_svldff1ub_u32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1ub_u32(pg, base); +} + +svuint64_t test_svldff1ub_u64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldff1ub_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1ub_u64(pg, base); +} + +svint16_t test_svldff1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldff1ub_vnum_s16(pg, base, vnum); +} + +svint32_t test_svldff1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1ub_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldff1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1ub_vnum_s64(pg, base, vnum); +} + +svuint16_t test_svldff1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldff1ub_vnum_u16(pg, base, vnum); +} + +svuint32_t test_svldff1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1ub_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldff1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1ub_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1ub_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint32_t test_svldff1uh_s32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldff1uh_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1uh_s32(pg, base); +} + +svint64_t test_svldff1uh_s64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldff1uh_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uh_s64(pg, base); +} + +svuint32_t test_svldff1uh_u32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldff1uh_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1uh_u32(pg, base); +} + +svuint64_t test_svldff1uh_u64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldff1uh_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uh_u64(pg, base); +} + +svint32_t test_svldff1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uh_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1uh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldff1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uh_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svldff1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uh_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldff1uh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldff1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uh_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint64_t test_svldff1uw_s64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldff1uw_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uw_s64(pg, base); +} + +svuint64_t test_svldff1uw_u64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldff1uw_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uw_u64(pg, base); +} + +svint64_t test_svldff1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uw_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svldff1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldff1uw_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldff1uw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svldnf1_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svldnf1_s8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_s8,,)(pg, base); +} + +svint16_t test_svldnf1_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnf1_s16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_s16,,)(pg, base); +} + +svint32_t test_svldnf1_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldnf1_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_s32,,)(pg, base); +} + +svint64_t test_svldnf1_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svldnf1_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_s64,,)(pg, base); +} + +svuint8_t test_svldnf1_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1_u8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_u8,,)(pg, base); +} + +svuint16_t test_svldnf1_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnf1_u16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_u16,,)(pg, base); +} + +svuint32_t test_svldnf1_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldnf1_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_u32,,)(pg, base); +} + +svuint64_t test_svldnf1_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svldnf1_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_u64,,)(pg, base); +} + +svfloat16_t test_svldnf1_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svldnf1_f16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_f16,,)(pg, base); +} + +svfloat32_t test_svldnf1_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svldnf1_f32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_f32,,)(pg, base); +} + +svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svldnf1_f64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1,_f64,,)(pg, base); +} + +svint8_t test_svldnf1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_s8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_s8,,)(pg, base, vnum); +} + +svint16_t test_svldnf1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_s16,,)(pg, base, vnum); +} + +svint32_t test_svldnf1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_s32,,)(pg, base, vnum); +} + +svint64_t test_svldnf1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_s64,,)(pg, base, vnum); +} + +svuint8_t test_svldnf1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_u8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_u8,,)(pg, base, vnum); +} + +svuint16_t test_svldnf1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_u16,,)(pg, base, vnum); +} + +svuint32_t test_svldnf1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_u32,,)(pg, base, vnum); +} + +svuint64_t test_svldnf1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16_t test_svldnf1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]]) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32_t test_svldnf1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]]) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64_t test_svldnf1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]]) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldnf1_vnum,_f64,,)(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint32_t test_svldnf1sh_s32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnf1sh_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldnf1sh_s32(pg, base); +} + +svint64_t test_svldnf1sh_s64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnf1sh_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sh_s64(pg, base); +} + +svuint32_t test_svldnf1sh_u32(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnf1sh_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldnf1sh_u32(pg, base); +} + +svuint64_t test_svldnf1sh_u64(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnf1sh_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sh_u64(pg, base); +} + +svint32_t test_svldnf1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sh_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldnf1sh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldnf1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sh_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svldnf1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sh_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[SEXT]] + return svldnf1sh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldnf1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sh_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint64_t test_svldnf1sw_s64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldnf1sw_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sw_s64(pg, base); +} + +svuint64_t test_svldnf1sw_u64(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldnf1sw_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sw_u64(pg, base); +} + +svint64_t test_svldnf1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sw_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svldnf1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1sw_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[SEXT]] + return svldnf1sw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c @@ -0,0 +1,135 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint16_t test_svldnf1ub_s16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_s16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldnf1ub_s16(pg, base); +} + +svint32_t test_svldnf1ub_s32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1ub_s32(pg, base); +} + +svint64_t test_svldnf1ub_s64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1ub_s64(pg, base); +} + +svuint16_t test_svldnf1ub_u16(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_u16 + // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldnf1ub_u16(pg, base); +} + +svuint32_t test_svldnf1ub_u32(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1ub_u32(pg, base); +} + +svuint64_t test_svldnf1ub_u64(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnf1ub_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1ub_u64(pg, base); +} + +svint16_t test_svldnf1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldnf1ub_vnum_s16(pg, base, vnum); +} + +svint32_t test_svldnf1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1ub_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldnf1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1ub_vnum_s64(pg, base, vnum); +} + +svuint16_t test_svldnf1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16> + // CHECK: ret <vscale x 8 x i16> %[[ZEXT]] + return svldnf1ub_vnum_u16(pg, base, vnum); +} + +svuint32_t test_svldnf1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1ub_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldnf1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1ub_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1ub_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint32_t test_svldnf1uh_s32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnf1uh_s32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1uh_s32(pg, base); +} + +svint64_t test_svldnf1uh_s64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnf1uh_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uh_s64(pg, base); +} + +svuint32_t test_svldnf1uh_u32(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnf1uh_u32 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1uh_u32(pg, base); +} + +svuint64_t test_svldnf1uh_u64(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnf1uh_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uh_u64(pg, base); +} + +svint32_t test_svldnf1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uh_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1uh_vnum_s32(pg, base, vnum); +} + +svint64_t test_svldnf1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uh_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uh_vnum_s64(pg, base, vnum); +} + +svuint32_t test_svldnf1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uh_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32> + // CHECK: ret <vscale x 4 x i32> %[[ZEXT]] + return svldnf1uh_vnum_u32(pg, base, vnum); +} + +svuint64_t test_svldnf1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uh_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uh_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +svint64_t test_svldnf1uw_s64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldnf1uw_s64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uw_s64(pg, base); +} + +svuint64_t test_svldnf1uw_u64(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldnf1uw_u64 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uw_u64(pg, base); +} + +svint64_t test_svldnf1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uw_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uw_vnum_s64(pg, base, vnum); +} + +svuint64_t test_svldnf1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnf1uw_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64> + // CHECK: ret <vscale x 2 x i64> %[[ZEXT]] + return svldnf1uw_vnum_u64(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svldnt1_s8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_s8,,)(pg, base); +} + +svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svldnt1_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_s16,,)(pg, base); +} + +svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svldnt1_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_s32,,)(pg, base); +} + +svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svldnt1_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_s64,,)(pg, base); +} + +svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svldnt1_u8 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_u8,,)(pg, base); +} + +svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svldnt1_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_u16,,)(pg, base); +} + +svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svldnt1_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_u32,,)(pg, base); +} + +svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svldnt1_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_u64,,)(pg, base); +} + +svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svldnt1_f16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_f16,,)(pg, base); +} + +svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svldnt1_f32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_f32,,)(pg, base); +} + +svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svldnt1_f64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base); +} + +svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_s8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_s8,,)(pg, base, vnum); +} + +svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_s16,,)(pg, base, vnum); +} + +svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_s32,,)(pg, base, vnum); +} + +svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_s64,,)(pg, base, vnum); +} + +svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_u8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK: ret <vscale x 16 x i8> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_u8,,)(pg, base, vnum); +} + +svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK: ret <vscale x 8 x i16> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_u16,,)(pg, base, vnum); +} + +svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK: ret <vscale x 4 x i32> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_u32,,)(pg, base, vnum); +} + +svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK: ret <vscale x 2 x i64> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_f16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]]) + // CHECK: ret <vscale x 8 x half> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_f32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]]) + // CHECK: ret <vscale x 4 x float> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svldnt1_vnum_f64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]]) + // CHECK: ret <vscale x 2 x double> %[[LOAD]] + return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c @@ -0,0 +1,216 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data) +{ + // CHECK-LABEL: test_svst1_s8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg) + return SVE_ACLE_FUNC(svst1,_s8,,)(pg, base, data); +} + +void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data) +{ + // CHECK-LABEL: test_svst1_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK: @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_s16,,)(pg, base, data); +} + +void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data) +{ + // CHECK-LABEL: test_svst1_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK: @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_s32,,)(pg, base, data); +} + +void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data) +{ + // CHECK-LABEL: test_svst1_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK: @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_s64,,)(pg, base, data); +} + +void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data) +{ + // CHECK-LABEL: test_svst1_u8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg) + return SVE_ACLE_FUNC(svst1,_u8,,)(pg, base, data); +} + +void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data) +{ + // CHECK-LABEL: test_svst1_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK: @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_u16,,)(pg, base, data); +} + +void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data) +{ + // CHECK-LABEL: test_svst1_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK: @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_u32,,)(pg, base, data); +} + +void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data) +{ + // CHECK-LABEL: test_svst1_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK: @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_u64,,)(pg, base, data); +} + +void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data) +{ + // CHECK-LABEL: test_svst1_f16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK: @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x half>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_f16,,)(pg, base, data); +} + +void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data) +{ + // CHECK-LABEL: test_svst1_f32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK: @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_f32,,)(pg, base, data); +} + +void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data) +{ + // CHECK-LABEL: test_svst1_f64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK: @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data); +} + +void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) +{ + // CHECK-LABEL: test_svst1_vnum_s8 + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg) + return SVE_ACLE_FUNC(svst1_vnum,_s8,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) +{ + // CHECK-LABEL: test_svst1_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) +{ + // CHECK-LABEL: test_svst1_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) +{ + // CHECK-LABEL: test_svst1_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) +{ + // CHECK-LABEL: test_svst1_vnum_u8 + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg) + return SVE_ACLE_FUNC(svst1_vnum,_u8,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) +{ + // CHECK-LABEL: test_svst1_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) +{ + // CHECK-LABEL: test_svst1_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) +{ + // CHECK-LABEL: test_svst1_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_u64,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data) +{ + // CHECK-LABEL: test_svst1_vnum_f16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x half>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_f16,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) +{ + // CHECK-LABEL: test_svst1_vnum_f32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_f32,,)(pg, base, vnum, data); +} + +void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) +{ + // CHECK-LABEL: test_svst1_vnum_f64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c @@ -0,0 +1,137 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data) +{ + // CHECK-LABEL: test_svst1b_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> + // CHECK: @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_s16,,)(pg, base, data); +} + +void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data) +{ + // CHECK-LABEL: test_svst1b_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> + // CHECK: @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_s32,,)(pg, base, data); +} + +void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data) +{ + // CHECK-LABEL: test_svst1b_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> + // CHECK: @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_s64,,)(pg, base, data); +} + +void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data) +{ + // CHECK-LABEL: test_svst1b_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> + // CHECK: @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_u16,,)(pg, base, data); +} + +void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data) +{ + // CHECK-LABEL: test_svst1b_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> + // CHECK: @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_u32,,)(pg, base, data); +} + +void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data) +{ + // CHECK-LABEL: test_svst1b_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> + // CHECK: @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b,_u64,,)(pg, base, data); +} + +void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data) +{ + // CHECK-LABEL: test_svst1b_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1b_vnum,_u64,,)(pg, base, vnum, data); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data) +{ + // CHECK-LABEL: test_svst1h_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> + // CHECK: @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h,_s32,,)(pg, base, data); +} + +void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data) +{ + // CHECK-LABEL: test_svst1h_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> + // CHECK: @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h,_s64,,)(pg, base, data); +} + +void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data) +{ + // CHECK-LABEL: test_svst1h_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> + // CHECK: @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h,_u32,,)(pg, base, data); +} + +void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data) +{ + // CHECK-LABEL: test_svst1h_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> + // CHECK: @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h,_u64,,)(pg, base, data); +} + +void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data) +{ + // CHECK-LABEL: test_svst1h_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data) +{ + // CHECK-LABEL: test_svst1h_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data) +{ + // CHECK-LABEL: test_svst1h_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data) +{ + // CHECK-LABEL: test_svst1h_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum + // CHECK: @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]]) + return SVE_ACLE_FUNC(svst1h_vnum,_u64,,)(pg, base, vnum, data); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data) +{ + // CHECK-LABEL: test_svst1w_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> + // CHECK: @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> {{.*}}, <vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}) + return SVE_ACLE_FUNC(svst1w,_s64,,)(pg, base, data); +} + +void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data) +{ + // CHECK-LABEL: test_svst1w_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> + // CHECK: @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> {{.*}}, <vscale x 2 x i32>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}) + return SVE_ACLE_FUNC(svst1w,_u64,,)(pg, base, data); +} + +void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data) +{ + // CHECK-LABEL: test_svst1w_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> {{.*}}, <vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}) + return SVE_ACLE_FUNC(svst1w_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data) +{ + // CHECK-LABEL: test_svst1w_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>* + // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %{{.*}}, i64 %vnum + // CHECK: @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> {{.*}}, <vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %{{.*}}) + return SVE_ACLE_FUNC(svst1w_vnum,_u64,,)(pg, base, vnum, data); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data) +{ + // CHECK-LABEL: test_svstnt1_s8 + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_s8,,)(pg, base, data); +} + +void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data) +{ + // CHECK-LABEL: test_svstnt1_s16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_s16,,)(pg, base, data); +} + +void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data) +{ + // CHECK-LABEL: test_svstnt1_s32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_s32,,)(pg, base, data); +} + +void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data) +{ + // CHECK-LABEL: test_svstnt1_s64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_s64,,)(pg, base, data); +} + +void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data) +{ + // CHECK-LABEL: test_svstnt1_u8 + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_u8,,)(pg, base, data); +} + +void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data) +{ + // CHECK-LABEL: test_svstnt1_u16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_u16,,)(pg, base, data); +} + +void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data) +{ + // CHECK-LABEL: test_svstnt1_u32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_u32,,)(pg, base, data); +} + +void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data) +{ + // CHECK-LABEL: test_svstnt1_u64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_u64,,)(pg, base, data); +} + +void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data) +{ + // CHECK-LABEL: test_svstnt1_f16 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %[[PG]], half* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_f16,,)(pg, base, data); +} + +void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data) +{ + // CHECK-LABEL: test_svstnt1_f32 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %[[PG]], float* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_f32,,)(pg, base, data); +} + +void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data) +{ + // CHECK-LABEL: test_svstnt1_f64 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %[[PG]], double* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data); +} + +void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_s8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_s8,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_u8 + // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>* + // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_u8,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_u64,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_f16,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_f32,,)(pg, base, vnum, data); +} + +void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) +{ + // CHECK-LABEL: test_svstnt1_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>* + // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data); +} Index: clang/utils/TableGen/SveEmitter.cpp =================================================================== --- clang/utils/TableGen/SveEmitter.cpp +++ clang/utils/TableGen/SveEmitter.cpp @@ -442,6 +442,97 @@ Bitwidth = 16; ElementBitwidth = 1; break; + case 'l': + Predicate = false; + Signed = true; + Float = false; + ElementBitwidth = Bitwidth = 64; + NumVectors = 0; + break; + case 'S': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 8; + NumVectors = 0; + Signed = true; + break; + case 'W': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 8; + NumVectors = 0; + Signed = false; + break; + case 'T': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 16; + NumVectors = 0; + Signed = true; + break; + case 'X': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 16; + NumVectors = 0; + Signed = false; + break; + case 'Y': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = false; + break; + case 'U': + Constant = true; + Pointer = true; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = true; + break; + case 'A': + Pointer = true; + ElementBitwidth = Bitwidth = 8; + NumVectors = 0; + Signed = true; + break; + case 'B': + Pointer = true; + ElementBitwidth = Bitwidth = 16; + NumVectors = 0; + Signed = true; + break; + case 'C': + Pointer = true; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = true; + break; + case 'D': + Pointer = true; + ElementBitwidth = Bitwidth = 64; + NumVectors = 0; + Signed = true; + break; + case 'E': + Pointer = true; + ElementBitwidth = Bitwidth = 8; + NumVectors = 0; + Signed = false; + break; + case 'F': + Pointer = true; + ElementBitwidth = Bitwidth = 16; + NumVectors = 0; + Signed = false; + break; + case 'G': + Pointer = true; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = false; + break; default: llvm_unreachable("Unhandled character!"); }