Index: clang/include/clang/Basic/arm_neon.td =================================================================== --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -1964,7 +1964,7 @@ } // v8.3-A Vector complex addition intrinsics -let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)", TargetGuard = "fullfp16" in { +let TargetGuard = "v8.3a,fullfp16" in { def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">; def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">; def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">; @@ -1972,7 +1972,7 @@ defm VCMLA_FP16 : VCMLA_ROTS<"h", "uint32x2_t", "uint32x4_t">; } -let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in { +let TargetGuard = "v8.3a" in { def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">; def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">; def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">; @@ -1980,7 +1980,7 @@ defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">; } -let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in { def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5653,10 +5653,16 @@ NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), - NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), - NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -5971,10 +5977,16 @@ NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), - NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), - NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), - NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), @@ -5997,14 +6009,26 @@ NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), - NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), - NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), - NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), - NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType), - NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), - NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), - NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), - NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), @@ -6442,10 +6466,6 @@ { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, - { NEON::BI__builtin_neon_vcadd_rot270_f16, NEON::BI__builtin_neon_vcadd_rot270_v, }, - { NEON::BI__builtin_neon_vcadd_rot90_f16, NEON::BI__builtin_neon_vcadd_rot90_v, }, - { NEON::BI__builtin_neon_vcaddq_rot270_f16, NEON::BI__builtin_neon_vcaddq_rot270_v, }, - { NEON::BI__builtin_neon_vcaddq_rot90_f16, NEON::BI__builtin_neon_vcaddq_rot90_v, }, { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, @@ -6464,14 +6484,6 @@ { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, - { NEON::BI__builtin_neon_vcmla_f16, NEON::BI__builtin_neon_vcmla_v, }, - { NEON::BI__builtin_neon_vcmla_rot180_f16, NEON::BI__builtin_neon_vcmla_rot180_v, }, - { NEON::BI__builtin_neon_vcmla_rot270_f16, NEON::BI__builtin_neon_vcmla_rot270_v, }, - { NEON::BI__builtin_neon_vcmla_rot90_f16, NEON::BI__builtin_neon_vcmla_rot90_v, }, - { NEON::BI__builtin_neon_vcmlaq_f16, NEON::BI__builtin_neon_vcmlaq_v, }, - { NEON::BI__builtin_neon_vcmlaq_rot180_f16, NEON::BI__builtin_neon_vcmlaq_rot180_v, }, - { NEON::BI__builtin_neon_vcmlaq_rot270_f16, NEON::BI__builtin_neon_vcmlaq_rot270_v, }, - { NEON::BI__builtin_neon_vcmlaq_rot90_f16, NEON::BI__builtin_neon_vcmlaq_rot90_v, }, { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, Index: clang/test/Sema/aarch64-neon-target.c =================================================================== --- clang/test/Sema/aarch64-neon-target.c +++ clang/test/Sema/aarch64-neon-target.c @@ -48,12 +48,19 @@ vqrdmlahh_s16(1, 1, 1); } +__attribute__((target("arch=armv8.3-a+fp16"))) +void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) { + vcaddq_rot90_f32(v4f32, v4f32); + vcmla_rot90_f16(v4f16, v4f16, v4f16); + vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); +} + __attribute__((target("arch=armv8.5-a"))) void test_v85(float32x4_t v4f32) { vrnd32xq_f32(v4f32); } -void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) { +void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16) { // dotprod vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} vdot_laneq_u32(v2i32, v8i8, v16i8, 1); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} @@ -79,6 +86,10 @@ vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}} vqrdmlah_laneq_s32(v2i32, v2i32, v4i32, 1); // expected-error {{always_inline function 'vqrdmlah_s32' requires target feature 'v8.1a'}} vqrdmlahh_s16(1, 1, 1); // expected-error {{always_inline function 'vqrdmlahh_s16' requires target feature 'v8.1a'}} + // 8.3 - complex + vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}} + vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}} + vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}} // 8.5 - frint vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}} } Index: clang/test/Sema/arm-neon-target.c =================================================================== --- clang/test/Sema/arm-neon-target.c +++ clang/test/Sema/arm-neon-target.c @@ -38,6 +38,12 @@ vqrdmlahq_s32(v, v, v); } +__attribute__((target("v8.3a,fullfp16"))) +void test_v83(float32x4_t v4f32, float16x4_t v4f16) { + vcaddq_rot90_f32(v4f32, v4f32); + vcmla_rot90_f16(v4f16, v4f16, v4f16); +} + void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) { // dotprod vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} @@ -57,4 +63,7 @@ vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} // v8.1 - qrdmla vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}} + // 8.3 - complex + vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}} + vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}} }