diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -323,7 +323,7 @@ def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">; def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">; -let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { +let TargetGuard = "v8.1a" in { def VQRDMLAH : SInst<"vqrdmlah", "....", "siQsQi">; def VQRDMLSH : SInst<"vqrdmlsh", "....", "siQsQi">; } @@ -614,7 +614,7 @@ def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">; } -let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { +let TargetGuard = "v8.1a" in { def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>; def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>; } @@ -1089,14 +1089,14 @@ def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">; def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">; } -let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> { let isLaneQ = 1; } def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> { let isLaneQ = 1; } -} +} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" // Note: d type implemented by SCALAR_VMULX_LANE def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; @@ -1394,7 +1394,7 @@ // Scalar Integer Saturating Rounding Doubling Multiply Half High def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; -let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">; @@ -1402,7 +1402,7 @@ //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">; -} +} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended @@ -1625,7 +1625,7 @@ let isLaneQ = 1; } -let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +let TargetGuard = "v8.1a" in { // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>; def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> { @@ -1637,7 +1637,7 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN> { let isLaneQ = 1; } -} +} // TargetGuard = "v8.1a" def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> { diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -431,6 +431,19 @@ if (CPUArch != llvm::ARM::ArchKind::INVALID) { ArchFeature = ("+" + llvm::ARM::getArchName(CPUArch)).str(); TargetFeatures.push_back(ArchFeature); + + // These features are added to allow arm_neon.h target(..) attributes to + // match with both arm and aarch64. We need to add all previous architecture + // versions, so that "8.6" also allows "8.1" functions. In case of v9.x the + // v8.x counterparts are added too. We only need these for anything > 8.0-A. + for (llvm::ARM::ArchKind I = llvm::ARM::convertV9toV8(CPUArch); + I != llvm::ARM::ArchKind::INVALID; --I) + Features[llvm::ARM::getSubArch(I)] = true; + if (CPUArch > llvm::ARM::ArchKind::ARMV8A && + CPUArch <= llvm::ARM::ArchKind::ARMV9_3A) + for (llvm::ARM::ArchKind I = CPUArch; I != llvm::ARM::ArchKind::INVALID; + --I) + Features[llvm::ARM::getSubArch(I)] = true; } // get default FPU features diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5858,10 +5858,14 @@ NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), - NEONMAP1(vqrdmlah_v, arm_neon_vqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlahq_v, arm_neon_vqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlsh_v, arm_neon_vqrdmlsh, Add1ArgType), - NEONMAP1(vqrdmlshq_v, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), @@ -6108,10 +6112,14 @@ NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), - NEONMAP1(vqrdmlah_v, aarch64_neon_sqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlahq_v, aarch64_neon_sqrdmlah, Add1ArgType), - NEONMAP1(vqrdmlsh_v, aarch64_neon_sqrdmlsh, Add1ArgType), - NEONMAP1(vqrdmlshq_v, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c --- a/clang/test/CodeGen/arm-target-features.c +++ b/clang/test/CodeGen/arm-target-features.c @@ -30,7 +30,7 @@ // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 -// CHECK-BASIC-V82: "target-features"="+aes,+armv8.2-a,+crc,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+sha2,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-BASIC-V82: "target-features"="+aes,+armv8.2-a,+crc,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+sha2,+thumb-mode,+v8.1a,+v8.2a,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM // CHECK-BASIC-V8-ARM: "target-features"="+aes,+armv8-a,+crc,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+sha2,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" @@ -113,4 +113,7 @@ // RUN: %clang_cc1 -triple thumb-linux-gnueabi -target-cpu cortex-m85 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARMV81M-CORTEX-M85-LINUX // CHECK-ARMV81M-CORTEX-M85-LINUX: "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+pacbti,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp" +// RUN: %clang_cc1 -triple thumbv9.3a-linux-gnueabihf -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARCH93 +// CHECK-ARCH93: "target-features"="+armv9.3-a,+thumb-mode,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8.8a,+v9.1a,+v9.2a,+v9.3a,+v9a" + void foo() {} diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c --- a/clang/test/Sema/aarch64-neon-target.c +++ b/clang/test/Sema/aarch64-neon-target.c @@ -41,6 +41,13 @@ vcvt_bf16_f32(v4f32); } +__attribute__((target("arch=armv8.1-a"))) +void test_v81(int32x2_t d, int32x4_t v, int s) { + vqrdmlahq_s32(v, v, v); + vqrdmlah_laneq_s32(d, d, v, 1); + vqrdmlahh_s16(1, 1, 1); +} + __attribute__((target("arch=armv8.5-a"))) void test_v85(float32x4_t v4f32) { vrnd32xq_f32(v4f32); @@ -68,6 +75,10 @@ vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16}} vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'bf16'}} vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} + // v8.1 - qrdmla + vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}} + vqrdmlah_laneq_s32(v2i32, v2i32, v4i32, 1); // expected-error {{always_inline function 'vqrdmlah_s32' requires target feature 'v8.1a'}} + vqrdmlahh_s16(1, 1, 1); // expected-error {{always_inline function 'vqrdmlahh_s16' requires target feature 'v8.1a'}} // 8.5 - frint vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}} } diff --git a/clang/test/Sema/arm-neon-target.c b/clang/test/Sema/arm-neon-target.c --- a/clang/test/Sema/arm-neon-target.c +++ b/clang/test/Sema/arm-neon-target.c @@ -33,6 +33,11 @@ vcvt_bf16_f32(v4f32); } +__attribute__((target("v8.1a"))) +void test_v81(int32x2_t d, int32x4_t v, int s) { + vqrdmlahq_s32(v, v, v); +} + void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) { // dotprod vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}} @@ -50,4 +55,6 @@ vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16}} vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'bf16'}} vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} + // v8.1 - qrdmla + vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}} } diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h --- a/llvm/include/llvm/Support/ARMTargetParser.h +++ b/llvm/include/llvm/Support/ARMTargetParser.h @@ -237,6 +237,20 @@ #include "llvm/Support/ARMTargetParser.def" }; +inline ArchKind &operator--(ArchKind &Kind) { + assert((Kind >= ArchKind::ARMV8A && Kind <= ArchKind::ARMV9_3A) && + "We only expect operator-- to be called with ARMV8/V9"); + if (Kind == ArchKind::INVALID || Kind == ArchKind::ARMV8A || + Kind == ArchKind::ARMV8_1A || Kind == ArchKind::ARMV9A || + Kind == ArchKind::ARMV8R) + Kind = ArchKind::INVALID; + else { + unsigned KindAsInteger = static_cast(Kind); + Kind = static_cast(--KindAsInteger); + } + return Kind; +} + // Information by ID StringRef getFPUName(unsigned FPUKind); FPUVersion getFPUVersion(unsigned FPUKind); @@ -258,6 +272,7 @@ bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt, std::vector &Features, unsigned &ArgFPUKind); +ArchKind convertV9toV8(ArchKind AK); // Information by Name unsigned getDefaultFPU(StringRef CPU, ArchKind AK); diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -88,54 +88,58 @@ llvm_unreachable("Unhandled architecture"); } +static ARM::ProfileKind getProfileKind(ARM::ArchKind AK) { + switch (AK) { + case ARM::ArchKind::ARMV6M: + case ARM::ArchKind::ARMV7M: + case ARM::ArchKind::ARMV7EM: + case ARM::ArchKind::ARMV8MMainline: + case ARM::ArchKind::ARMV8MBaseline: + case ARM::ArchKind::ARMV8_1MMainline: + return ARM::ProfileKind::M; + case ARM::ArchKind::ARMV7R: + case ARM::ArchKind::ARMV8R: + return ARM::ProfileKind::R; + case ARM::ArchKind::ARMV7A: + case ARM::ArchKind::ARMV7VE: + case ARM::ArchKind::ARMV7K: + case ARM::ArchKind::ARMV8A: + case ARM::ArchKind::ARMV8_1A: + case ARM::ArchKind::ARMV8_2A: + case ARM::ArchKind::ARMV8_3A: + case ARM::ArchKind::ARMV8_4A: + case ARM::ArchKind::ARMV8_5A: + case ARM::ArchKind::ARMV8_6A: + case ARM::ArchKind::ARMV8_7A: + case ARM::ArchKind::ARMV8_8A: + case ARM::ArchKind::ARMV9A: + case ARM::ArchKind::ARMV9_1A: + case ARM::ArchKind::ARMV9_2A: + case ARM::ArchKind::ARMV9_3A: + return ARM::ProfileKind::A; + case ARM::ArchKind::ARMV4: + case ARM::ArchKind::ARMV4T: + case ARM::ArchKind::ARMV5T: + case ARM::ArchKind::ARMV5TE: + case ARM::ArchKind::ARMV5TEJ: + case ARM::ArchKind::ARMV6: + case ARM::ArchKind::ARMV6K: + case ARM::ArchKind::ARMV6T2: + case ARM::ArchKind::ARMV6KZ: + case ARM::ArchKind::ARMV7S: + case ARM::ArchKind::IWMMXT: + case ARM::ArchKind::IWMMXT2: + case ARM::ArchKind::XSCALE: + case ARM::ArchKind::INVALID: + return ARM::ProfileKind::INVALID; + } + llvm_unreachable("Unhandled architecture"); +} + // Profile A/R/M ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { Arch = getCanonicalArchName(Arch); - switch (parseArch(Arch)) { - case ArchKind::ARMV6M: - case ArchKind::ARMV7M: - case ArchKind::ARMV7EM: - case ArchKind::ARMV8MMainline: - case ArchKind::ARMV8MBaseline: - case ArchKind::ARMV8_1MMainline: - return ProfileKind::M; - case ArchKind::ARMV7R: - case ArchKind::ARMV8R: - return ProfileKind::R; - case ArchKind::ARMV7A: - case ArchKind::ARMV7VE: - case ArchKind::ARMV7K: - case ArchKind::ARMV8A: - case ArchKind::ARMV8_1A: - case ArchKind::ARMV8_2A: - case ArchKind::ARMV8_3A: - case ArchKind::ARMV8_4A: - case ArchKind::ARMV8_5A: - case ArchKind::ARMV8_6A: - case ArchKind::ARMV8_7A: - case ArchKind::ARMV8_8A: - case ArchKind::ARMV9A: - case ArchKind::ARMV9_1A: - case ArchKind::ARMV9_2A: - case ArchKind::ARMV9_3A: - return ProfileKind::A; - case ArchKind::ARMV4: - case ArchKind::ARMV4T: - case ArchKind::ARMV5T: - case ArchKind::ARMV5TE: - case ArchKind::ARMV5TEJ: - case ArchKind::ARMV6: - case ArchKind::ARMV6K: - case ArchKind::ARMV6T2: - case ArchKind::ARMV6KZ: - case ArchKind::ARMV7S: - case ArchKind::IWMMXT: - case ArchKind::IWMMXT2: - case ArchKind::XSCALE: - case ArchKind::INVALID: - return ProfileKind::INVALID; - } - llvm_unreachable("Unhandled architecture"); + return getProfileKind(parseArch(Arch)); } StringRef ARM::getArchSynonym(StringRef Arch) { @@ -546,6 +550,17 @@ return StartingNumFeatures != Features.size(); } +ARM::ArchKind ARM::convertV9toV8(ARM::ArchKind AK) { + if (getProfileKind(AK) != ProfileKind::A) + return ARM::ArchKind::INVALID; + if (AK < ARM::ArchKind::ARMV9A || AK > ARM::ArchKind::ARMV9_3A) + return ARM::ArchKind::INVALID; + unsigned AK_v8 = static_cast(ARM::ArchKind::ARMV8_5A); + AK_v8 += static_cast(AK) - + static_cast(ARM::ArchKind::ARMV9A); + return static_cast(AK_v8); +} + StringRef ARM::getDefaultCPU(StringRef Arch) { ArchKind AK = parseArch(Arch); if (AK == ArchKind::INVALID)