diff --git a/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def b/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def @@ -0,0 +1,39 @@ +#ifdef GET_SVE_BUILTINS +BUILTIN(__builtin_sve_svget_neonq_s8, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svget_neonq_s16, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svget_neonq_s32, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svget_neonq_s64, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svget_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svget_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svget_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svget_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svget_neonq_f16, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svget_neonq_f32, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svget_neonq_f64, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svget_neonq_bf16, "q8yq8y", "n") +BUILTIN(__builtin_sve_svset_neonq_s8, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svset_neonq_s16, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svset_neonq_s32, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svset_neonq_s64, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svset_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svset_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svset_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svset_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svset_neonq_f16, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svset_neonq_f32, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svset_neonq_f64, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svset_neonq_bf16, "q8yq8y", "n") +BUILTIN(__builtin_sve_svdup_neonq_s8, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_s16, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_s32, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_s64, "q4iq16bq4i", "n") +BUILTIN(__builtin_sve_svdup_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svdup_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svdup_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svdup_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n") +BUILTIN(__builtin_sve_svdup_neonq_f16, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svdup_neonq_f32, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svdup_neonq_f64, "q8hq16bq8hh", "n") +BUILTIN(__builtin_sve_svdup_neonq_bf16, "q8yq8y", "n") +#endif + diff --git a/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def b/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def @@ -0,0 +1,39 @@ +#ifdef GET_SVE_LLVM_INTRINSIC_MAP +SVEMAP2(svget_neonq_s8, 1), +SVEMAP2(svget_neonq_s16, 2), +SVEMAP2(svget_neonq_s32, 3), +SVEMAP2(svget_neonq_s64, 4), +SVEMAP2(svget_neonq_u8, 1), +SVEMAP2(svget_neonq_u16, 2), +SVEMAP2(svget_neonq_u32, 3), +SVEMAP2(svget_neonq_u64, 4), +SVEMAP2(svget_neonq_f16, 1), +SVEMAP2(svget_neonq_f32, 2), +SVEMAP2(svget_neonq_f64, 3), +SVEMAP2(svget_neonq_bf16, 1), +SVEMAP2(svset_neonq_s8, 1), +SVEMAP2(svset_neonq_s16, 2), +SVEMAP2(svset_neonq_s32, 3), +SVEMAP2(svset_neonq_s64, 4), +SVEMAP2(svset_neonq_u8, 1), +SVEMAP2(svset_neonq_u16, 2), +SVEMAP2(svset_neonq_u32, 3), +SVEMAP2(svset_neonq_u64, 4), +SVEMAP2(svset_neonq_f16, 1), +SVEMAP2(svset_neonq_f32, 2), +SVEMAP2(svset_neonq_f64, 3), +SVEMAP2(svset_neonq_bf16, 1), +SVEMAP2(svdup_neonq_s8, 1), +SVEMAP2(svdup_neonq_s16, 2), +SVEMAP2(svdup_neonq_s32, 3), +SVEMAP2(svdup_neonq_s64, 4), +SVEMAP2(svdup_neonq_u8, 1), +SVEMAP2(svdup_neonq_u16, 2), +SVEMAP2(svdup_neonq_u32, 3), +SVEMAP2(svdup_neonq_u64, 4), +SVEMAP2(svdup_neonq_f16, 1), +SVEMAP2(svdup_neonq_f32, 2), +SVEMAP2(svdup_neonq_f64, 3), +SVEMAP2(svdup_neonq_bf16, 1), +#endif + diff --git a/clang/include/clang/Basic/BuiltinsSVE.def b/clang/include/clang/Basic/BuiltinsSVE.def --- a/clang/include/clang/Basic/BuiltinsSVE.def +++ b/clang/include/clang/Basic/BuiltinsSVE.def @@ -15,6 +15,7 @@ #define GET_SVE_BUILTINS #include "clang/Basic/arm_sve_builtins.inc" +#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def" #undef GET_SVE_BUILTINS #undef BUILTIN diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -307,6 +307,9 @@ if (FPU & SveMode) Builder.defineMacro("__ARM_FEATURE_SVE", "1"); + if ((FPU & NeonMode) && (FPU & SveMode)) + Builder.defineMacro("__ARM_NEON_SVE_BRIDGE", "1"); + if (HasSVE2) Builder.defineMacro("__ARM_FEATURE_SVE2", "1"); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6384,6 +6384,7 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" +#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" #undef GET_SVE_LLVM_INTRINSIC_MAP }; @@ -9307,6 +9308,52 @@ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); return Builder.CreateCall(F, {V0, V1, Ops[1]}); } + + case SVE::BI__builtin_sve_svset_neonq_s8: + case SVE::BI__builtin_sve_svset_neonq_s16: + case SVE::BI__builtin_sve_svset_neonq_s32: + case SVE::BI__builtin_sve_svset_neonq_s64: + case SVE::BI__builtin_sve_svset_neonq_u8: + case SVE::BI__builtin_sve_svset_neonq_u16: + case SVE::BI__builtin_sve_svset_neonq_u32: + case SVE::BI__builtin_sve_svset_neonq_u64: + case SVE::BI__builtin_sve_svset_neonq_f16: + case SVE::BI__builtin_sve_svset_neonq_f32: + case SVE::BI__builtin_sve_svset_neonq_f64: + case SVE::BI__builtin_sve_svset_neonq_bf16: { + return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0)); + } + + case SVE::BI__builtin_sve_svget_neonq_s8: + case SVE::BI__builtin_sve_svget_neonq_s16: + case SVE::BI__builtin_sve_svget_neonq_s32: + case SVE::BI__builtin_sve_svget_neonq_s64: + case SVE::BI__builtin_sve_svget_neonq_u8: + case SVE::BI__builtin_sve_svget_neonq_u16: + case SVE::BI__builtin_sve_svget_neonq_u32: + case SVE::BI__builtin_sve_svget_neonq_u64: + case SVE::BI__builtin_sve_svget_neonq_f16: + case SVE::BI__builtin_sve_svget_neonq_f32: + case SVE::BI__builtin_sve_svget_neonq_f64: + case SVE::BI__builtin_sve_svget_neonq_bf16: { + return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0)); + } + + case SVE::BI__builtin_sve_svdup_neonq_s8: + case SVE::BI__builtin_sve_svdup_neonq_s16: + case SVE::BI__builtin_sve_svdup_neonq_s32: + case SVE::BI__builtin_sve_svdup_neonq_s64: + case SVE::BI__builtin_sve_svdup_neonq_u8: + case SVE::BI__builtin_sve_svdup_neonq_u16: + case SVE::BI__builtin_sve_svdup_neonq_u32: + case SVE::BI__builtin_sve_svdup_neonq_u64: + case SVE::BI__builtin_sve_svdup_neonq_f16: + case SVE::BI__builtin_sve_svdup_neonq_f32: + case SVE::BI__builtin_sve_svdup_neonq_f64: + case SVE::BI__builtin_sve_svdup_neonq_bf16:{ + Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0], Builder.getInt64(0)); + return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, { Ty }, { Insert, Builder.getInt64(0) } ); + } } /// Should not happen diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -219,6 +219,8 @@ clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h) # Generate arm_cde.h clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h) + # Copy arm_neon_sve_bridge.h + copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} arm_neon_sve_bridge.h) endif() if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD) # Generate riscv_vector.h diff --git a/clang/lib/Headers/arm_neon_sve_bridge.h b/clang/lib/Headers/arm_neon_sve_bridge.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/arm_neon_sve_bridge.h @@ -0,0 +1,103 @@ +/*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_NEON_SVE_BRIDGE_H +#define __ARM_NEON_SVE_BRIDGE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Function attributes */ +#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) + +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) +svint8_t svset_neonq_s8(svint8_t, int8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) +svint16_t svset_neonq_s16(svint16_t, int16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) +svint32_t svset_neonq_s32(svint32_t, int32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) +svint64_t svset_neonq_s64(svint64_t, int64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) +svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) +svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) +svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) +svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) +svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) +svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) +svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) +int8x16_t svget_neonq_s8(svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) +int16x8_t svget_neonq_s16(svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) +int32x4_t svget_neonq_s32(svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) +int64x2_t svget_neonq_s64(svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) +uint8x16_t svget_neonq_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) +uint16x8_t svget_neonq_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) +uint32x4_t svget_neonq_u32(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) +uint64x2_t svget_neonq_u64(svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) +float16x8_t svget_neonq_f16(svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) +float32x4_t svget_neonq_f32(svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) +float64x2_t svget_neonq_f64(svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) +svint8_t svdup_neonq_s8(int8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) +svint16_t svdup_neonq_s16(int16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) +svint32_t svdup_neonq_s32(int32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) +svint64_t svdup_neonq_s64(int64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) +svuint8_t svdup_neonq_u8(uint8x16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) +svuint16_t svdup_neonq_u16(uint16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) +svuint32_t svdup_neonq_u32(uint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) +svuint64_t svdup_neonq_u64(int64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) +svfloat16_t svdup_neonq_f16(float16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) +svfloat32_t svdup_neonq_f32(float32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) +svfloat64_t svdup_neonq_f64(float64x2_t); +#if defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16) +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) +svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) +bfloat16x8_t svget_neonq_bf16(svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) +svbfloat16_t svdup_neonq_bf16(bfloat16x8_t); +#endif //defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16) + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif //__ARM_NEON_SVE_BRIDGE_H diff --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c @@ -0,0 +1,215 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +#include + +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 + +// CHECK-LABEL: @test_svdup_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_s811__Int8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svdup_neonq_s8(int8x16_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_s8,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s1611__Int16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svdup_neonq_s16(int16x8_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_s16,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s3211__Int32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svdup_neonq_s32(int32x4_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_s32,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s6411__Int64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svdup_neonq_s64(int64x2_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_s64,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_u812__Uint8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svdup_neonq_u8(uint8x16_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_u8,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u1612__Uint16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svdup_neonq_u16(uint16x8_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_u16,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u3212__Uint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svdup_neonq_u32(uint32x4_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_u32,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u6412__Uint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svdup_neonq_u64(uint64x2_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_u64,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( undef, <8 x half> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8f16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f1613__Float16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( undef, <8 x half> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8f16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svdup_neonq_f16(float16x8_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_f16,,)(n); +} + +// CHECK-NEXT %0 = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> %n, i64 0) +// CHECK-NEXT %1 = call @llvm.aarch64.sve.dupq.lane.nxv4f32( %0, i64 0) +// CHECK-NEXT ret %1 +// CHECK-LABEL: @test_svdup_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4f32( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f3213__Float32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4f32( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svdup_neonq_f32(float32x4_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_f32,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( undef, <2 x double> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2f64( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f6413__Float64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( undef, <2 x double> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2f64( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svdup_neonq_f64(float64x2_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_f64,,)(n); +} + +// CHECK-LABEL: @test_svdup_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> [[N:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP0]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svdup_neonq_bf1614__Bfloat16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP0]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdup_neonq_bf16(bfloat16x8_t n) +{ + return SVE_ACLE_FUNC(svdup_neonq,_bf16,,)(n); +} diff --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c @@ -0,0 +1,189 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +#include + +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 + +// CHECK-LABEL: @test_svget_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svget_neonq_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_svget_neonq_s8(svint8_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_s8,,)(n); +} + +// +// CHECK-LABEL: @test_svget_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_svget_neonq_s16(svint16_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_s16,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_svget_neonq_s32(svint32_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_s32,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s64u11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +int64x2_t test_svget_neonq_s64(svint64_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_s64,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svget_neonq_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_svget_neonq_u8(svuint8_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_u8,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_svget_neonq_u16(svuint16_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_u16,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_svget_neonq_u32(svuint32_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_u32,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u64u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +uint64x2_t test_svget_neonq_u64(svuint64_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_u64,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f16u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_svget_neonq_f16(svfloat16_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_f16,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f32u13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_svget_neonq_f32(svfloat32_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_f32,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f64u13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <2 x double> [[TMP0]] +// +float64x2_t test_svget_neonq_f64(svfloat64_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_f64,,)(n); +} + +// CHECK-LABEL: @test_svget_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16( [[N:%.*]], i64 0) +// CHECK-NEXT: ret <8 x bfloat> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svget_neonq_bf16u14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16( [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret <8 x bfloat> [[TMP0]] +// +bfloat16x8_t test_svget_neonq_bf16(svbfloat16_t n) +{ + return SVE_ACLE_FUNC(svget_neonq,_bf16,,)(n); +} diff --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c @@ -0,0 +1,188 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s +#include + +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 + +// CHECK-LABEL: @test_svset_neonq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svset_neonq_s8u10__SVInt8_t11__Int8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svset_neonq_s8(svint8_t s, int8x16_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_s8,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s16u11__SVInt16_t11__Int16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svset_neonq_s16(svint16_t s, int16x8_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_s16,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s32u11__SVInt32_t11__Int32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svset_neonq_s32(svint32_t s, int32x4_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_s32,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s64u11__SVInt64_t11__Int64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svset_neonq_s64(svint64_t s, int64x2_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_s64,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svset_neonq_u8u11__SVUint8_t12__Uint8x16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.v16i8( [[S:%.*]], <16 x i8> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svset_neonq_u8(svuint8_t s, uint8x16_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_u8,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u16u12__SVUint16_t12__Uint16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.v8i16( [[S:%.*]], <8 x i16> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svset_neonq_u16(svuint16_t s, uint16x8_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_u16,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u32u12__SVUint32_t12__Uint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[S:%.*]], <4 x i32> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svset_neonq_u32(svuint32_t s, uint32x4_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_u32,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u64u12__SVUint64_t12__Uint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.v2i64( [[S:%.*]], <2 x i64> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svset_neonq_u64(svuint64_t s, uint64x2_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_u64,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( [[S:%.*]], <8 x half> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f16u13__SVFloat16_t13__Float16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v8f16( [[S:%.*]], <8 x half> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svset_neonq_f16(svfloat16_t s, float16x8_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_f16,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( [[S:%.*]], <4 x float> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f32u13__SVFloat32_t13__Float32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v4f32( [[S:%.*]], <4 x float> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svset_neonq_f32(svfloat32_t s, float32x4_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_f32,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( [[S:%.*]], <2 x double> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f64u13__SVFloat64_t13__Float64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v2f64( [[S:%.*]], <2 x double> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svset_neonq_f64(svfloat64_t s, float64x2_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_f64,,)(s, n); +} + +// CHECK-LABEL: @test_svset_neonq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svset_neonq_bf16u14__SVBFloat16_t14__Bfloat16x8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svset_neonq_bf16(svbfloat16_t s, bfloat16x8_t n) +{ + return SVE_ACLE_FUNC(svset_neonq,_bf16,,)(s, n); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1322,6 +1322,7 @@ setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); } setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);