diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -20347,13 +20347,7 @@ case 'w': if (VT == MVT::Other) break; - if (VT == MVT::f16) - return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::bf16) - return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPRRegClass); @@ -20373,13 +20367,7 @@ case 't': if (VT == MVT::Other) break; - if (VT == MVT::f16) - return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::bf16) - return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass - : &ARM::SPRRegClass); - if (VT == MVT::f32 || VT == MVT::i32) + if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_VFP2RegClass); diff --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll --- a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll +++ b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll @@ -1,21 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; No FP16/BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD ; With FP16, Without BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD ; With FP16/BF16 -; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP -; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP -; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD -; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP +; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD +; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD ; This test ensures that we can use `w` and `t` constraints to allocate ; S-registers for 16-bit FP inputs and outputs for inline assembly, with either @@ -41,6 +54,8 @@ ; FP16-SOFTFP-LABEL: half_t: ; FP16-SOFTFP: @ %bb.0: @ %entry ; FP16-SOFTFP-NEXT: vmov.f16 s0, r0 +; FP16-SOFTFP-NEXT: vmov.f16 r0, s0 +; FP16-SOFTFP-NEXT: vmov s0, r0 ; FP16-SOFTFP-NEXT: @APP ; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 ; FP16-SOFTFP-NEXT: @NO_APP @@ -49,10 +64,52 @@ ; ; FP16-HARD-LABEL: half_t: ; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: vmov.f16 r0, s0 +; FP16-HARD-NEXT: vmov s0, r0 ; FP16-HARD-NEXT: @APP ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: half_t: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: half_t: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: half_t: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: vmov.f16 r0, s0 +; BF16-HARD-NEXT: vmov s0, r0 +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: half_t: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call half asm "vmov $0, $1", "=t,t"(half %x) ret half %0 @@ -78,6 +135,8 @@ ; FP16-SOFTFP-LABEL: half_w: ; FP16-SOFTFP: @ %bb.0: @ %entry ; FP16-SOFTFP-NEXT: vmov.f16 s0, r0 +; FP16-SOFTFP-NEXT: vmov.f16 r0, s0 +; FP16-SOFTFP-NEXT: vmov s0, r0 ; FP16-SOFTFP-NEXT: @APP ; FP16-SOFTFP-NEXT: vmov.f32 s0, s0 ; FP16-SOFTFP-NEXT: @NO_APP @@ -86,10 +145,52 @@ ; ; FP16-HARD-LABEL: half_w: ; FP16-HARD: @ %bb.0: @ %entry +; FP16-HARD-NEXT: vmov.f16 r0, s0 +; FP16-HARD-NEXT: vmov s0, r0 ; FP16-HARD-NEXT: @APP ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: half_w: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: half_w: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: half_w: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: vmov.f16 r0, s0 +; BF16-HARD-NEXT: vmov s0, r0 +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: half_w: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call half asm "vmov $0, $1", "=w,w"(half %x) ret half %0 @@ -127,6 +228,42 @@ ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: bf16_t: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: bf16_t: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: bf16_t: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: bf16_t: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call bfloat asm "vmov $0, $1", "=t,t"(bfloat %x) ret bfloat %0 @@ -164,10 +301,43 @@ ; FP16-HARD-NEXT: vmov.f32 s0, s0 ; FP16-HARD-NEXT: @NO_APP ; FP16-HARD-NEXT: bx lr +; +; BF16-SOFTFP-LABEL: bf16_w: +; BF16-SOFTFP: @ %bb.0: @ %entry +; BF16-SOFTFP-NEXT: vmov s0, r0 +; BF16-SOFTFP-NEXT: @APP +; BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; BF16-SOFTFP-NEXT: @NO_APP +; BF16-SOFTFP-NEXT: vmov r0, s0 +; BF16-SOFTFP-NEXT: bx lr +; +; SIMD-BF16-SOFTFP-LABEL: bf16_w: +; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0 +; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0 +; SIMD-BF16-SOFTFP-NEXT: @APP +; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-SOFTFP-NEXT: @NO_APP +; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0 +; SIMD-BF16-SOFTFP-NEXT: bx lr +; +; BF16-HARD-LABEL: bf16_w: +; BF16-HARD: @ %bb.0: @ %entry +; BF16-HARD-NEXT: @APP +; BF16-HARD-NEXT: vmov.f32 s0, s0 +; BF16-HARD-NEXT: @NO_APP +; BF16-HARD-NEXT: bx lr +; +; SIMD-BF16-HARD-LABEL: bf16_w: +; SIMD-BF16-HARD: @ %bb.0: @ %entry +; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0 +; SIMD-BF16-HARD-NEXT: vmov s0, r0 +; SIMD-BF16-HARD-NEXT: @APP +; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0 +; SIMD-BF16-HARD-NEXT: @NO_APP +; SIMD-BF16-HARD-NEXT: bx lr entry: %0 = tail call bfloat asm "vmov $0, $1", "=w,w"(bfloat %x) ret bfloat %0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; BF16-HARD: {{.*}} -; BF16-SOFTFP: {{.*}}