diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -480,6 +480,16 @@ // Load one quadword and replicate (scalar base) def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; +// Load N-element structure into N vectors (scalar base) +def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld2", [IsStructLoad]>; +def SVLD3 : SInst<"svld3[_{2}]", "3Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld3", [IsStructLoad]>; +def SVLD4 : SInst<"svld4[_{2}]", "4Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld4", [IsStructLoad]>; + +// Load N-element structure into N vectors (scalar base, VL displacement) +def SVLD2_VNUM : SInst<"svld2_vnum[_{2}]", "2Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld2", [IsStructLoad]>; +def SVLD3_VNUM : SInst<"svld3_vnum[_{2}]", "3Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld3", [IsStructLoad]>; +def SVLD4_VNUM : SInst<"svld4_vnum[_{2}]", "4Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld4", [IsStructLoad]>; + // Load one octoword and replicate (scalar base) let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">; @@ -577,6 +587,16 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_st1_scatter_scalar_offset">; def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">; +// Store N vectors into N-element structure (scalar base) +def SVST2 : SInst<"svst2[_{d}]", "vPp2", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st2", [IsStructStore]>; +def SVST3 : SInst<"svst3[_{d}]", "vPp3", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st3", [IsStructStore]>; +def SVST4 : SInst<"svst4[_{d}]", "vPp4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st4", [IsStructStore]>; + +// Store N vectors into N-element structure (scalar base, VL displacement) +def SVST2_VNUM : SInst<"svst2_vnum[_{d}]", "vPpl2", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st2", [IsStructStore]>; +def SVST3_VNUM : SInst<"svst3_vnum[_{d}]", "vPpl3", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st3", [IsStructStore]>; +def SVST4_VNUM : SInst<"svst4_vnum[_{d}]", "vPpl4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st4", [IsStructStore]>; + // Store one vector, with no truncation, non-temporal (scalar base) def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7904,6 +7904,84 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); + auto VecPtrTy = llvm::PointerType::getUnqual(VTy); + auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); + + unsigned N; + switch (IntID) { + case Intrinsic::aarch64_sve_ld2: + N = 2; + break; + case Intrinsic::aarch64_sve_ld3: + N = 3; + break; + case Intrinsic::aarch64_sve_ld4: + N = 4; + break; + default: + llvm_unreachable("unknown intrinsic!"); + } + auto RetTy = llvm::VectorType::get(VTy->getElementType(), + VTy->getElementCount() * N); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); + BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + + Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()}); + return Builder.CreateCall(F, { Predicate, BasePtr }); +} + +Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); + auto VecPtrTy = llvm::PointerType::getUnqual(VTy); + auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); + + unsigned N; + switch (IntID) { + case Intrinsic::aarch64_sve_st2: + N = 2; + break; + case Intrinsic::aarch64_sve_st3: + N = 3; + break; + case Intrinsic::aarch64_sve_st4: + N = 4; + break; + default: + llvm_unreachable("unknown intrinsic!"); + } + auto TupleTy = + llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0); + Value *Val = Ops.back(); + BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + + // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we + // need to break up the tuple vector. + SmallVector Operands; + Function *FExtr = + CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); + for (unsigned I = 0; I < N; ++I) + Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)})); + Operands.append({Predicate, BasePtr}); + + Function *F = CGM.getIntrinsic(IntID, { VTy }); + return Builder.CreateCall(F, Operands); +} + // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and // svpmullt_pair intrinsics, with the exception that their results are bitcast // to a wider type. @@ -8114,6 +8192,10 @@ return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherPrefetch()) return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructLoad()) + return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructStore()) + return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isUndef()) return UndefValue::get(Ty); else if (Builtin->LLVMIntrinsic != 0) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3990,6 +3990,11 @@ llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svld2_s8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_s8,,)(pg, base); +} + +svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld2_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_s16,,)(pg, base); +} + +svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld2_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_s32,,)(pg, base); +} + +svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svld2_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_s64,,)(pg, base); +} + +svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld2_u8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_u8,,)(pg, base); +} + +svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld2_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_u16,,)(pg, base); +} + +svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld2_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_u32,,)(pg, base); +} + +svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svld2_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_u64,,)(pg, base); +} + +svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svld2_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( %[[PG]], half* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_f16,,)(pg, base); +} + +svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svld2_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( %[[PG]], float* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_f32,,)(pg, base); +} + +svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svld2_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( %[[PG]], double* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base); +} + +svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_s8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_s8,,)(pg, base, vnum); +} + +svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_s16,,)(pg, base, vnum); +} + +svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_s32,,)(pg, base, vnum); +} + +svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_s64,,)(pg, base, vnum); +} + +svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_u8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_u8,,)(pg, base, vnum); +} + +svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_u16,,)(pg, base, vnum); +} + +svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_u32,,)(pg, base, vnum); +} + +svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_f16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_f32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld2_vnum_f64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svld3_s8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_s8,,)(pg, base); +} + +svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld3_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_s16,,)(pg, base); +} + +svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld3_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_s32,,)(pg, base); +} + +svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svld3_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_s64,,)(pg, base); +} + +svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld3_u8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_u8,,)(pg, base); +} + +svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld3_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_u16,,)(pg, base); +} + +svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld3_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_u32,,)(pg, base); +} + +svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svld3_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_u64,,)(pg, base); +} + +svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svld3_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( %[[PG]], half* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_f16,,)(pg, base); +} + +svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svld3_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( %[[PG]], float* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_f32,,)(pg, base); +} + +svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svld3_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( %[[PG]], double* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base); +} + +svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_s8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_s8,,)(pg, base, vnum); +} + +svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_s16,,)(pg, base, vnum); +} + +svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_s32,,)(pg, base, vnum); +} + +svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_s64,,)(pg, base, vnum); +} + +svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_u8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_u8,,)(pg, base, vnum); +} + +svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_u16,,)(pg, base, vnum); +} + +svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_u32,,)(pg, base, vnum); +} + +svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_f16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_f32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld3_vnum_f64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svld4_s8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_s8,,)(pg, base); +} + +svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld4_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_s16,,)(pg, base); +} + +svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld4_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_s32,,)(pg, base); +} + +svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svld4_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_s64,,)(pg, base); +} + +svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld4_u8 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( %pg, i8* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_u8,,)(pg, base); +} + +svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld4_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( %[[PG]], i16* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_u16,,)(pg, base); +} + +svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld4_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( %[[PG]], i32* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_u32,,)(pg, base); +} + +svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svld4_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( %[[PG]], i64* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_u64,,)(pg, base); +} + +svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) +{ + // CHECK-LABEL: test_svld4_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( %[[PG]], half* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_f16,,)(pg, base); +} + +svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) +{ + // CHECK-LABEL: test_svld4_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( %[[PG]], float* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_f32,,)(pg, base); +} + +svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) +{ + // CHECK-LABEL: test_svld4_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %[[PG]], double* %base) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base); +} + +svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_s8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_s8,,)(pg, base, vnum); +} + +svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_s16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_s16,,)(pg, base, vnum); +} + +svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_s32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_s32,,)(pg, base, vnum); +} + +svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_s64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_s64,,)(pg, base, vnum); +} + +svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_u8 + // CHECK: %[[BASE:.*]] = bitcast i8* %base to * + // CHECK: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_u8,,)(pg, base, vnum); +} + +svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_u16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_u16,,)(pg, base, vnum); +} + +svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_u32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_u32,,)(pg, base, vnum); +} + +svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_u64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_u64,,)(pg, base, vnum); +} + +svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_f16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_f16,,)(pg, base, vnum); +} + +svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_f32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_f32,,)(pg, base, vnum); +} + +svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + // CHECK-LABEL: test_svld4_vnum_f64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BASE]], i64 %vnum, i64 0 + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret %[[LOAD]] + return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c @@ -0,0 +1,271 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data) +{ + // CHECK-LABEL: test_svst2_s8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 1) + // CHECK: call void @llvm.aarch64.sve.st2.nxv16i8( %[[V0]], %[[V1]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_s8,,)(pg, base, data); +} + +void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) +{ + // CHECK-LABEL: test_svst2_s16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8i16( %[[V0]], %[[V1]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_s16,,)(pg, base, data); +} + +void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) +{ + // CHECK-LABEL: test_svst2_s32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4i32( %[[V0]], %[[V1]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_s32,,)(pg, base, data); +} + +void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data) +{ + // CHECK-LABEL: test_svst2_s64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2i64( %[[V0]], %[[V1]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_s64,,)(pg, base, data); +} + +void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data) +{ + // CHECK-LABEL: test_svst2_u8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 1) + // CHECK: call void @llvm.aarch64.sve.st2.nxv16i8( %[[V0]], %[[V1]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_u8,,)(pg, base, data); +} + +void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) +{ + // CHECK-LABEL: test_svst2_u16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8i16( %[[V0]], %[[V1]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_u16,,)(pg, base, data); +} + +void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) +{ + // CHECK-LABEL: test_svst2_u32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4i32( %[[V0]], %[[V1]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_u32,,)(pg, base, data); +} + +void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) +{ + // CHECK-LABEL: test_svst2_u64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2i64( %[[V0]], %[[V1]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_u64,,)(pg, base, data); +} + +void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) +{ + // CHECK-LABEL: test_svst2_f16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8f16( %[[V0]], %[[V1]], %[[PG]], half* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_f16,,)(pg, base, data); +} + +void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) +{ + // CHECK-LABEL: test_svst2_f32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4f32( %[[V0]], %[[V1]], %[[PG]], float* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_f32,,)(pg, base, data); +} + +void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) +{ + // CHECK-LABEL: test_svst2_f64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2f64( %[[V0]], %[[V1]], %[[PG]], double* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data); +} + +void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_s8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 1) + // CHECK: call void @llvm.aarch64.sve.st2.nxv16i8( %[[V0]], %[[V1]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_s8,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8i16( %[[V0]], %[[V1]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4i32( %[[V0]], %[[V1]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2i64( %[[V0]], %[[V1]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_u8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 1) + // CHECK: call void @llvm.aarch64.sve.st2.nxv16i8( %[[V0]], %[[V1]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_u8,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8i16( %[[V0]], %[[V1]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4i32( %[[V0]], %[[V1]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2i64( %[[V0]], %[[V1]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_u64,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv8f16( %[[V0]], %[[V1]], %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_f16,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv4f32( %[[V0]], %[[V1]], %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_f32,,)(pg, base, vnum, data); +} + +void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data) +{ + // CHECK-LABEL: test_svst2_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %data, i32 1) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st2.nxv2f64( %[[V0]], %[[V1]], %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c @@ -0,0 +1,293 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data) +{ + // CHECK-LABEL: test_svst3_s8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 2) + // CHECK: call void @llvm.aarch64.sve.st3.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_s8,,)(pg, base, data); +} + +void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) +{ + // CHECK-LABEL: test_svst3_s16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_s16,,)(pg, base, data); +} + +void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) +{ + // CHECK-LABEL: test_svst3_s32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_s32,,)(pg, base, data); +} + +void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data) +{ + // CHECK-LABEL: test_svst3_s64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_s64,,)(pg, base, data); +} + +void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data) +{ + // CHECK-LABEL: test_svst3_u8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 2) + // CHECK: call void @llvm.aarch64.sve.st3.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_u8,,)(pg, base, data); +} + +void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) +{ + // CHECK-LABEL: test_svst3_u16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_u16,,)(pg, base, data); +} + +void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) +{ + // CHECK-LABEL: test_svst3_u32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_u32,,)(pg, base, data); +} + +void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) +{ + // CHECK-LABEL: test_svst3_u64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_u64,,)(pg, base, data); +} + +void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) +{ + // CHECK-LABEL: test_svst3_f16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8f16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], half* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_f16,,)(pg, base, data); +} + +void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) +{ + // CHECK-LABEL: test_svst3_f32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4f32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], float* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_f32,,)(pg, base, data); +} + +void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) +{ + // CHECK-LABEL: test_svst3_f64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2f64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], double* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data); +} + +void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_s8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 2) + // CHECK: call void @llvm.aarch64.sve.st3.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_s8,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_u8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %data, i32 2) + // CHECK: call void @llvm.aarch64.sve.st3.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_u8,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_u64,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv8f16( %[[V0]], %[[V1]], %[[V2]], %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_f16,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv4f32( %[[V0]], %[[V1]], %[[V2]], %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_f32,,)(pg, base, vnum, data); +} + +void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data) +{ + // CHECK-LABEL: test_svst3_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %data, i32 2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st3.nxv2f64( %[[V0]], %[[V1]], %[[V2]], %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c @@ -0,0 +1,315 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data) +{ + // CHECK-LABEL: test_svst4_s8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 3) + // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_s8,,)(pg, base, data); +} + +void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) +{ + // CHECK-LABEL: test_svst4_s16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_s16,,)(pg, base, data); +} + +void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) +{ + // CHECK-LABEL: test_svst4_s32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_s32,,)(pg, base, data); +} + +void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data) +{ + // CHECK-LABEL: test_svst4_s64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_s64,,)(pg, base, data); +} + +void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data) +{ + // CHECK-LABEL: test_svst4_u8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 3) + // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %pg, i8* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_u8,,)(pg, base, data); +} + +void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) +{ + // CHECK-LABEL: test_svst4_u16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i16* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_u16,,)(pg, base, data); +} + +void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) +{ + // CHECK-LABEL: test_svst4_u32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i32* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_u32,,)(pg, base, data); +} + +void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) +{ + // CHECK-LABEL: test_svst4_u64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i64* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_u64,,)(pg, base, data); +} + +void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) +{ + // CHECK-LABEL: test_svst4_f16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 3) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8f16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], half* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_f16,,)(pg, base, data); +} + +void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) +{ + // CHECK-LABEL: test_svst4_f32 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4f32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], float* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_f32,,)(pg, base, data); +} + +void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) +{ + // CHECK-LABEL: test_svst4_f64 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2f64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], double* %base) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data); +} + +void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_s8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 3) + // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_s8,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_s16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_s16,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_s32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_s32,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_s64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_s64,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_u8 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %data, i32 3) + // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %pg, i8* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_u8,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_u16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i16* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_u16,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_u32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i32* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_u32,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_u64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], i64* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_u64,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_f16 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv8f16( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], half* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_f16,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_f32 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv4f32( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], float* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_f32,,)(pg, base, vnum, data); +} + +void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data) +{ + // CHECK-LABEL: test_svst4_vnum_f64 + // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to * + // CHECK-DAG: %[[GEP:.*]] = getelementptr , * %[[BITCAST]], i64 %vnum, i64 0 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 1) + // CHECK-DAG: %[[V2:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 2) + // CHECK-DAG: %[[V3:.*]] = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %data, i32 3) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.st4.nxv2f64( %[[V0]], %[[V1]], %[[V2]], %[[V3]], %[[PG]], double* %[[GEP]]) + // CHECK-NEXT: ret + return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data); +}