Index: clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def =================================================================== --- /dev/null +++ clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def @@ -0,0 +1,39 @@ +#ifdef GET_SVE_BUILTINS +BUILTIN(__builtin_sve_svget_neonq_s8, "V16Scq16Sc", "n") +BUILTIN(__builtin_sve_svget_neonq_s16, "V8sq8s", "n") +BUILTIN(__builtin_sve_svget_neonq_s32, "V4iq4i", "n") +BUILTIN(__builtin_sve_svget_neonq_s64, "V2Wiq2Wi", "n") +BUILTIN(__builtin_sve_svget_neonq_u8, "V16Ucq16Uc", "n") +BUILTIN(__builtin_sve_svget_neonq_u16, "V16Usq16Us", "n") +BUILTIN(__builtin_sve_svget_neonq_u32, "V4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svget_neonq_u64, "V2UWiq2UWi", "n") +BUILTIN(__builtin_sve_svget_neonq_f16, "V8hq8h", "n") +BUILTIN(__builtin_sve_svget_neonq_f32, "V4fq4f", "n") +BUILTIN(__builtin_sve_svget_neonq_f64, "V2dq2d", "n") +BUILTIN(__builtin_sve_svget_neonq_bf16, "V8yq8y", "n") +BUILTIN(__builtin_sve_svset_neonq_s8, "q16Scq16ScV16Sc", "n") +BUILTIN(__builtin_sve_svset_neonq_s16, "q8sq8sV8s", "n") +BUILTIN(__builtin_sve_svset_neonq_s32, "q4iq4iV4i", "n") +BUILTIN(__builtin_sve_svset_neonq_s64, "q2Wiq2WiV2Wi", "n") +BUILTIN(__builtin_sve_svset_neonq_u8, "q16Ucq16UcV16Uc", "n") +BUILTIN(__builtin_sve_svset_neonq_u16, "q8Usq8UsV8s", "n") +BUILTIN(__builtin_sve_svset_neonq_u32, "q4Uiq4UiV4Ui", "n") +BUILTIN(__builtin_sve_svset_neonq_u64, "q2UWiq2UWiV2UWi", "n") +BUILTIN(__builtin_sve_svset_neonq_f16, "q8hq8hV8h", "n") +BUILTIN(__builtin_sve_svset_neonq_f32, "q4fq4fV4f", "n") +BUILTIN(__builtin_sve_svset_neonq_f64, "q2dq2dV2d", "n") +BUILTIN(__builtin_sve_svset_neonq_bf16, "q8yq8yV8y", "n") +BUILTIN(__builtin_sve_svdup_neonq_s8, "q16ScV16Sc", "n") +BUILTIN(__builtin_sve_svdup_neonq_s16, "q8sV8s", "n") +BUILTIN(__builtin_sve_svdup_neonq_s32, "q4iV4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_s64, "q4iV4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_u8, "q16UcV16Uc", "n") +BUILTIN(__builtin_sve_svdup_neonq_u16, "q8UsV8Us", "n") +BUILTIN(__builtin_sve_svdup_neonq_u32, "q4UiV4Ui", "n") +BUILTIN(__builtin_sve_svdup_neonq_u64, "q2UWiV2UWi", "n") +BUILTIN(__builtin_sve_svdup_neonq_f16, "q8hV8h", "n") +BUILTIN(__builtin_sve_svdup_neonq_f32, "q4fV4f", "n") +BUILTIN(__builtin_sve_svdup_neonq_f64, "q2dV2d", "n") +BUILTIN(__builtin_sve_svdup_neonq_bf16, "q8yV8y", "n") +#endif + Index: clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def =================================================================== --- /dev/null +++ clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def @@ -0,0 +1,39 @@ +#ifdef GET_SVE_LLVM_INTRINSIC_MAP +SVEMAP2(svget_neonq_s8, 1), +SVEMAP2(svget_neonq_s16, 2), +SVEMAP2(svget_neonq_s32, 3), +SVEMAP2(svget_neonq_s64, 4), +SVEMAP2(svget_neonq_u8, 1), +SVEMAP2(svget_neonq_u16, 2), +SVEMAP2(svget_neonq_u32, 3), +SVEMAP2(svget_neonq_u64, 4), +SVEMAP2(svget_neonq_f16, 5), +SVEMAP2(svget_neonq_f32, 6), +SVEMAP2(svget_neonq_f64, 7), +SVEMAP2(svget_neonq_bf16, 12), +SVEMAP2(svset_neonq_s8, 1), +SVEMAP2(svset_neonq_s16, 2), +SVEMAP2(svset_neonq_s32, 3), +SVEMAP2(svset_neonq_s64, 4), +SVEMAP2(svset_neonq_u8, 1), +SVEMAP2(svset_neonq_u16, 2), +SVEMAP2(svset_neonq_u32, 3), +SVEMAP2(svset_neonq_u64, 4), +SVEMAP2(svset_neonq_f16, 5), +SVEMAP2(svset_neonq_f32, 6), +SVEMAP2(svset_neonq_f64, 7), +SVEMAP2(svset_neonq_bf16, 12), +SVEMAP2(svdup_neonq_s8, 1), +SVEMAP2(svdup_neonq_s16, 2), +SVEMAP2(svdup_neonq_s32, 3), +SVEMAP2(svdup_neonq_s64, 4), +SVEMAP2(svdup_neonq_u8, 1), +SVEMAP2(svdup_neonq_u16, 2), +SVEMAP2(svdup_neonq_u32, 3), +SVEMAP2(svdup_neonq_u64, 4), +SVEMAP2(svdup_neonq_f16, 5), +SVEMAP2(svdup_neonq_f32, 6), +SVEMAP2(svdup_neonq_f64, 7), +SVEMAP2(svdup_neonq_bf16, 12), +#endif + Index: clang/include/clang/Basic/BuiltinsSVE.def =================================================================== --- clang/include/clang/Basic/BuiltinsSVE.def +++ clang/include/clang/Basic/BuiltinsSVE.def @@ -15,6 +15,7 @@ #define GET_SVE_BUILTINS #include "clang/Basic/arm_sve_builtins.inc" +#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def" #undef GET_SVE_BUILTINS #undef BUILTIN Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -307,6 +307,9 @@ if (FPU & SveMode) Builder.defineMacro("__ARM_FEATURE_SVE", "1"); + if ((FPU & NeonMode) && (FPU & SveMode)) + Builder.defineMacro("__ARM_NEON_SVE_BRIDGE", "1"); + if (HasSVE2) Builder.defineMacro("__ARM_FEATURE_SVE2", "1"); Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -6385,6 +6385,7 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" +#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" #undef GET_SVE_LLVM_INTRINSIC_MAP }; @@ -9308,6 +9309,54 @@ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); return Builder.CreateCall(F, {V0, V1, Ops[1]}); } + + case SVE::BI__builtin_sve_svset_neonq_s8: + case SVE::BI__builtin_sve_svset_neonq_s16: + case SVE::BI__builtin_sve_svset_neonq_s32: + case SVE::BI__builtin_sve_svset_neonq_s64: + case SVE::BI__builtin_sve_svset_neonq_u8: + case SVE::BI__builtin_sve_svset_neonq_u16: + case SVE::BI__builtin_sve_svset_neonq_u32: + case SVE::BI__builtin_sve_svset_neonq_u64: + case SVE::BI__builtin_sve_svset_neonq_f16: + case SVE::BI__builtin_sve_svset_neonq_f32: + case SVE::BI__builtin_sve_svset_neonq_f64: + case SVE::BI__builtin_sve_svset_neonq_bf16: { + return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0)); + } + + case SVE::BI__builtin_sve_svget_neonq_s8: + case SVE::BI__builtin_sve_svget_neonq_s16: + case SVE::BI__builtin_sve_svget_neonq_s32: + case SVE::BI__builtin_sve_svget_neonq_s64: + case SVE::BI__builtin_sve_svget_neonq_u8: + case SVE::BI__builtin_sve_svget_neonq_u16: + case SVE::BI__builtin_sve_svget_neonq_u32: + case SVE::BI__builtin_sve_svget_neonq_u64: + case SVE::BI__builtin_sve_svget_neonq_f16: + case SVE::BI__builtin_sve_svget_neonq_f32: + case SVE::BI__builtin_sve_svget_neonq_f64: + case SVE::BI__builtin_sve_svget_neonq_bf16: { + return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0)); + } + + case SVE::BI__builtin_sve_svdup_neonq_s8: + case SVE::BI__builtin_sve_svdup_neonq_s16: + case SVE::BI__builtin_sve_svdup_neonq_s32: + case SVE::BI__builtin_sve_svdup_neonq_s64: + case SVE::BI__builtin_sve_svdup_neonq_u8: + case SVE::BI__builtin_sve_svdup_neonq_u16: + case SVE::BI__builtin_sve_svdup_neonq_u32: + case SVE::BI__builtin_sve_svdup_neonq_u64: + case SVE::BI__builtin_sve_svdup_neonq_f16: + case SVE::BI__builtin_sve_svdup_neonq_f32: + case SVE::BI__builtin_sve_svdup_neonq_f64: + case SVE::BI__builtin_sve_svdup_neonq_bf16: { + Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0], + Builder.getInt64(0)); + return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty}, + {Insert, Builder.getInt64(0)}); + } } /// Should not happen Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -219,6 +219,8 @@ clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h) # Generate arm_cde.h clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h) + # Copy arm_neon_sve_bridge.h + copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} arm_neon_sve_bridge.h) endif() if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD) # Generate riscv_vector.h Index: clang/lib/Headers/arm_neon_sve_bridge.h =================================================================== --- /dev/null +++ clang/lib/Headers/arm_neon_sve_bridge.h @@ -0,0 +1,184 @@ +/*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_NEON_SVE_BRIDGE_H +#define __ARM_NEON_SVE_BRIDGE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Function attributes */ +#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) +#define __aio \ + static __inline__ \ + __attribute__((__always_inline__, __nodebug__, __overloadable__)) + +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) +svint8_t svset_neonq(svint8_t, int8x16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) +svint16_t svset_neonq(svint16_t, int16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) +svint32_t svset_neonq(svint32_t, int32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) +svint64_t svset_neonq(svint64_t, int64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) +svuint8_t svset_neonq(svuint8_t, uint8x16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) +svuint16_t svset_neonq(svuint16_t, uint16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) +svuint32_t svset_neonq(svuint32_t, uint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) +svuint64_t svset_neonq(svuint64_t, uint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) +svfloat16_t svset_neonq(svfloat16_t, float16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) +svfloat32_t svset_neonq(svfloat32_t, float32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) +svfloat64_t svset_neonq(svfloat64_t, float64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) +svint8_t svset_neonq_s8(svint8_t, int8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) +svint16_t svset_neonq_s16(svint16_t, int16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) +svint32_t svset_neonq_s32(svint32_t, int32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) +svint64_t svset_neonq_s64(svint64_t, int64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) +svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) +svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) +svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) +svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) +svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) +svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) +svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t); + +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) +int8x16_t svget_neonq(svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) +int16x8_t svget_neonq(svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) +int32x4_t svget_neonq(svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) +int64x2_t svget_neonq(svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) +uint8x16_t svget_neonq(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) +uint16x8_t svget_neonq(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) +uint32x4_t svget_neonq(svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) +uint64x2_t svget_neonq(svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) +float16x8_t svget_neonq(svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) +float32x4_t svget_neonq(svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) +float64x2_t svget_neonq(svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) +int8x16_t svget_neonq_s8(svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) +int16x8_t svget_neonq_s16(svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) +int32x4_t svget_neonq_s32(svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) +int64x2_t svget_neonq_s64(svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) +uint8x16_t svget_neonq_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) +uint16x8_t svget_neonq_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) +uint32x4_t svget_neonq_u32(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) +uint64x2_t svget_neonq_u64(svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) +float16x8_t svget_neonq_f16(svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) +float32x4_t svget_neonq_f32(svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) +float64x2_t svget_neonq_f64(svfloat64_t); + +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) +svint8_t svdup_neonq(int8x16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) +svint16_t svdup_neonq(int16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) +svint32_t svdup_neonq(int32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) +svint64_t svdup_neonq(int64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) +svuint8_t svdup_neonq(uint8x16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) +svuint16_t svdup_neonq(uint16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) +svuint32_t svdup_neonq(uint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) +svuint64_t svdup_neonq(uint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) +svfloat16_t svdup_neonq(float16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) +svfloat32_t svdup_neonq(float32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) +svfloat64_t svdup_neonq(float64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) +svint8_t svdup_neonq_s8(int8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) +svint16_t svdup_neonq_s16(int16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) +svint32_t svdup_neonq_s32(int32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) +svint64_t svdup_neonq_s64(int64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) +svuint8_t svdup_neonq_u8(uint8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) +svuint16_t svdup_neonq_u16(uint16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) +svuint32_t svdup_neonq_u32(uint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) +svuint64_t svdup_neonq_u64(uint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) +svfloat16_t svdup_neonq_f16(float16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) +svfloat32_t svdup_neonq_f32(float32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) +svfloat64_t svdup_neonq_f64(float64x2_t); + +#if defined(__ARM_FEATURE_SVE_BF16) +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) +svbfloat16_t svset_neonq(svbfloat16_t, bfloat16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) +svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) +bfloat16x8_t svget_neonq(svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) +bfloat16x8_t svget_neonq_bf16(svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) +svbfloat16_t svdup_neonq(bfloat16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) +svbfloat16_t svdup_neonq_bf16(bfloat16x8_t); +#endif // defined(__ARM_FEATURE_SVE_BF16) + +#undef __ai +#undef __aio + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif //__ARM_NEON_SVE_BRIDGE_H Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c @@ -0,0 +1,210 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svdup_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_s811__Int8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svdup_neonq_s8(int8x16_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _s8, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s1611__Int16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svdup_neonq_s16(int16x8_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _s16, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s3211__Int32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svdup_neonq_s32(int32x4_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _s32, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s6411__Int64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svdup_neonq_s64(int64x2_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _s64, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_u812__Uint8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svdup_neonq_u8(uint8x16_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _u8, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u1612__Uint16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svdup_neonq_u16(uint16x8_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _u16, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u3212__Uint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svdup_neonq_u32(uint32x4_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _u32, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u6412__Uint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svdup_neonq_u64(uint64x2_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _u64, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( undef, <8 x half> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8f16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f1613__Float16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( undef, <8 x half> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8f16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svdup_neonq_f16(float16x8_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _f16, , )(n); +} + +// CHECK-NEXT %0 = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> %n, i64 0) +// CHECK-NEXT %1 = call @llvm.aarch64.sve.dupq.lane.nxv4f32( %0, i64 0) +// CHECK-NEXT ret %1 +// CHECK-LABEL: @test_svdup_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4f32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f3213__Float32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4f32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svdup_neonq_f32(float32x4_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _f32, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( undef, <2 x double> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2f64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f6413__Float64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( undef, <2 x double> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2f64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svdup_neonq_f64(float64x2_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _f64, , )(n); +} + +// CHECK-LABEL: @test_svdup_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svdup_neonq_bf1614__Bfloat16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdup_neonq_bf16(bfloat16x8_t n) { + return SVE_ACLE_FUNC(svdup_neonq, _bf16, , )(n); +} Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c @@ -0,0 +1,184 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svget_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svget_neonq_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_svget_neonq_s8(svint8_t n) { + return SVE_ACLE_FUNC(svget_neonq, _s8, , )(n); +} + +// +// CHECK-LABEL: @test_svget_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_svget_neonq_s16(svint16_t n) { + return SVE_ACLE_FUNC(svget_neonq, _s16, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_svget_neonq_s32(svint32_t n) { + return SVE_ACLE_FUNC(svget_neonq, _s32, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s64u11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +int64x2_t test_svget_neonq_s64(svint64_t n) { + return SVE_ACLE_FUNC(svget_neonq, _s64, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svget_neonq_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_svget_neonq_u8(svuint8_t n) { + return SVE_ACLE_FUNC(svget_neonq, _u8, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_svget_neonq_u16(svuint16_t n) { + return SVE_ACLE_FUNC(svget_neonq, _u16, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_svget_neonq_u32(svuint32_t n) { + return SVE_ACLE_FUNC(svget_neonq, _u32, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u64u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +uint64x2_t test_svget_neonq_u64(svuint64_t n) { + return SVE_ACLE_FUNC(svget_neonq, _u64, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f16u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_svget_neonq_f16(svfloat16_t n) { + return SVE_ACLE_FUNC(svget_neonq, _f16, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f32u13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_svget_neonq_f32(svfloat32_t n) { + return SVE_ACLE_FUNC(svget_neonq, _f32, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f64u13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x double> [[TMP0]] +// +float64x2_t test_svget_neonq_f64(svfloat64_t n) { + return SVE_ACLE_FUNC(svget_neonq, _f64, , )(n); +} + +// CHECK-LABEL: @test_svget_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x bfloat> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svget_neonq_bf16u14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x bfloat> [[TMP0]] +// +bfloat16x8_t test_svget_neonq_bf16(svbfloat16_t n) { + return SVE_ACLE_FUNC(svget_neonq, _bf16, , )(n); +} Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c @@ -0,0 +1,183 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svset_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svset_neonq_s8u10__SVInt8_t11__Int8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svset_neonq_s8(svint8_t s, int8x16_t n) { + return SVE_ACLE_FUNC(svset_neonq, _s8, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s16u11__SVInt16_t11__Int16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svset_neonq_s16(svint16_t s, int16x8_t n) { + return SVE_ACLE_FUNC(svset_neonq, _s16, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s32u11__SVInt32_t11__Int32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svset_neonq_s32(svint32_t s, int32x4_t n) { + return SVE_ACLE_FUNC(svset_neonq, _s32, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s64u11__SVInt64_t11__Int64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svset_neonq_s64(svint64_t s, int64x2_t n) { + return SVE_ACLE_FUNC(svset_neonq, _s64, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svset_neonq_u8u11__SVUint8_t12__Uint8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svset_neonq_u8(svuint8_t s, uint8x16_t n) { + return SVE_ACLE_FUNC(svset_neonq, _u8, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u16u12__SVUint16_t12__Uint16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svset_neonq_u16(svuint16_t s, uint16x8_t n) { + return SVE_ACLE_FUNC(svset_neonq, _u16, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u32u12__SVUint32_t12__Uint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svset_neonq_u32(svuint32_t s, uint32x4_t n) { + return SVE_ACLE_FUNC(svset_neonq, _u32, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u64u12__SVUint64_t12__Uint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svset_neonq_u64(svuint64_t s, uint64x2_t n) { + return SVE_ACLE_FUNC(svset_neonq, _u64, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( [[S:%.*]], <8 x half> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f16u13__SVFloat16_t13__Float16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( [[S:%.*]], <8 x half> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svset_neonq_f16(svfloat16_t s, float16x8_t n) { + return SVE_ACLE_FUNC(svset_neonq, _f16, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( [[S:%.*]], <4 x float> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f32u13__SVFloat32_t13__Float32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( [[S:%.*]], <4 x float> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svset_neonq_f32(svfloat32_t s, float32x4_t n) { + return SVE_ACLE_FUNC(svset_neonq, _f32, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( [[S:%.*]], <2 x double> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f64u13__SVFloat64_t13__Float64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( [[S:%.*]], <2 x double> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svset_neonq_f64(svfloat64_t s, float64x2_t n) { + return SVE_ACLE_FUNC(svset_neonq, _f64, , )(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svset_neonq_bf16u14__SVBFloat16_t14__Bfloat16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svset_neonq_bf16(svbfloat16_t s, bfloat16x8_t n) { + return SVE_ACLE_FUNC(svset_neonq, _bf16, , )(s, n); +}