Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -971,6 +971,8 @@ setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3730,6 +3730,13 @@ .addReg(SrcReg, getKillRegState(KillSrc)); return; } + // Copy from GPR32 to FPR16. + if (AArch64::FPR16RegClass.contains(DestReg) && + AArch64::GPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVWHr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } if (DestReg == AArch64::NZCV) { assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1625,6 +1625,8 @@ }]>; +def : Pat<(f16 fpimm:$in), + (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)), FPR16)>; def : Pat<(f32 fpimm:$in), (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; def : Pat<(f64 fpimm:$in), Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -286,6 +286,8 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, @@ -712,6 +714,12 @@ (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. + def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), Index: llvm/test/CodeGen/AArch64/f16-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/f16-imm.ll +++ llvm/test/CodeGen/AArch64/f16-imm.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-NOZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-ZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFP16 define half @Const0() { ; CHECK-NOZCZ-LABEL: Const0: @@ -84,31 +84,49 @@ } define half @Const5() { -; CHECK-LABEL: Const5: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const5: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #12272 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const5: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI5_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH2FF0 } define half @Const6() { -; CHECK-LABEL: Const6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const6: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20417 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const6: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI6_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH4FC1 } define half @Const7() { -; CHECK-LABEL: Const7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const7: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20480 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const7: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH5000 } Index: llvm/test/CodeGen/AArch64/isinf.ll =================================================================== --- llvm/test/CodeGen/AArch64/isinf.ll +++ llvm/test/CodeGen/AArch64/isinf.ll @@ -6,13 +6,13 @@ declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) -; INFINITY requires loading the constant for _Float16 +; Check if INFINITY for _Float16 is materialized define i32 @replace_isinf_call_f16(half %x) { ; CHECK-LABEL: replace_isinf_call_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: mov w8, #31744 ; CHECK-NEXT: fabs h0, h0 -; CHECK-NEXT: ldr h1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: fmov h1, w8 ; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-fadda-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fadda-select.ll +++ llvm/test/CodeGen/AArch64/sve-fadda-select.ll @@ -45,18 +45,11 @@ ret double %fadda } -; Currently the folding doesn't work for f16 element types, since -0.0 is not treated as a legal f16 immediate. - define half @pred_fadda_nxv2f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.d }, p1/z, [x8] -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -69,13 +62,8 @@ define half @pred_fadda_nxv4f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.s }, p1/z, [x8] -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -88,13 +76,8 @@ define half @pred_fadda_nxv8f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.h }, p1/z, [x8] -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 Index: llvm/test/CodeGen/AArch64/sve-fp-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -47,14 +47,13 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8] -; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] ; CHECK-NEXT: fadda h0, p0, h0, z2.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -73,22 +72,21 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z1.h }, p0, [sp] ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8] +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #6, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [x8, #7, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 @@ -102,14 +100,12 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8] ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2