diff --git a/0001-ARM-Support-fp16-bf16-using-t-constraint.patch b/0001-ARM-Support-fp16-bf16-using-t-constraint.patch new file mode 100644 --- /dev/null +++ b/0001-ARM-Support-fp16-bf16-using-t-constraint.patch @@ -0,0 +1,73 @@ +From adbb3fc71a62e2e6c733fe0ff3e1c14427dc6dc6 Mon Sep 17 00:00:00 2001 +From: Archibald Elliott +Date: Fri, 23 Sep 2022 18:38:33 +0100 +Subject: [PATCH] [ARM] Support fp16/bf16 using t constraint + +fp16 and bf16 values can be used in GCC's inline assembly using the "t" +constraint, which means "VFP floating-point registers s0-s31" - fp16 and +bf16 values are stored in S registers too. + +This change ensures that LLVM is compatible with GCC for programs that +use fp16 and the 't' constraint. + +Fixes #57753 +--- + llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 ++ + llvm/test/CodeGen/ARM/inlineasm-fp-half.ll | 32 ++++++++++++++++++++++ + 2 files changed, 34 insertions(+) + create mode 100644 llvm/test/CodeGen/ARM/inlineasm-fp-half.ll + +diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp +index 0cc97e3b9e0f..74931b150f09 100644 +--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp ++++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp +@@ -20163,6 +20163,8 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( + case 't': + if (VT == MVT::Other) + break; ++ if (VT == MVT::f16 || VT == MVT::bf16) ++ return RCPair(0U, &ARM::HPRRegClass); + if (VT == MVT::f32 || VT == MVT::i32) + return RCPair(0U, &ARM::SPRRegClass); + if (VT.getSizeInBits() == 64) +diff --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll +new file mode 100644 +index 000000000000..b2a894f49c52 +--- /dev/null ++++ b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll +@@ -0,0 +1,32 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mtriple=arm -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s ++; RUN: llc -mtriple=thumb -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s ++ ++ ++define arm_aapcscc half @f(half %x) nounwind { ++; CHECK-LABEL: f: ++; CHECK: @ %bb.0: @ %entry ++; CHECK-NEXT: vmov.f16 s0, r0 ++; CHECK-NEXT: @APP ++; CHECK-NEXT: vsqrt.f16 s0, s0 ++; CHECK-NEXT: @NO_APP ++; CHECK-NEXT: vmov r0, s0 ++; CHECK-NEXT: bx lr ++entry: ++ %0 = tail call half asm "vsqrt.f16 $0, $1", "=t,t"(half %x) ++ ret half %0 ++} ++ ++define arm_aapcscc bfloat @h(bfloat %x) nounwind { ++; CHECK-LABEL: h: ++; CHECK: @ %bb.0: @ %entry ++; CHECK-NEXT: vmov.f16 s0, r0 ++; CHECK-NEXT: @APP ++; CHECK-NEXT: vmov.f32 s0, s0 ++; CHECK-NEXT: @NO_APP ++; CHECK-NEXT: vmov.f16 r0, s0 ++; CHECK-NEXT: bx lr ++entry: ++ %0 = tail call bfloat asm "vmov.f32 $0, $1", "=t,t"(bfloat %x) ++ ret bfloat %0 ++} +-- +2.25.1 + diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -76,19 +76,19 @@ (AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | - AArch64::AEK_SVE2)) + AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2)) AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | - AArch64::AEK_SVE2)) + AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2)) AARCH64_ARCH("armv9.3-a", ARMV9_3A, "9.3-A", "v9.3a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | - AArch64::AEK_SVE2)) + AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2)) // For v8-R, we do not enable crypto and align with GCC that enables a more // minimal set of optional architecture extensions. AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",