diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -974,6 +974,8 @@ setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4052,6 +4052,12 @@ def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; +// Pattern for FP16 immediates +let Predicates = [HasFullFP16] in { + def : Pat<(f16 fpimm:$in), + (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; +} + //===----------------------------------------------------------------------===// // Floating point conversion instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -286,6 +286,8 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, @@ -718,6 +720,12 @@ (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. + def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll --- a/llvm/test/CodeGen/AArch64/f16-imm.ll +++ b/llvm/test/CodeGen/AArch64/f16-imm.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-NOZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-ZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFP16 define half @Const0() { ; CHECK-NOZCZ-LABEL: Const0: @@ -84,31 +84,49 @@ } define half @Const5() { -; CHECK-LABEL: Const5: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const5: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #12272 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const5: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI5_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH2FF0 } define half @Const6() { -; CHECK-LABEL: Const6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const6: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20417 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const6: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI6_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH4FC1 } define half @Const7() { -; CHECK-LABEL: Const7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const7: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20480 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const7: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH5000 } diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -6,13 +6,13 @@ declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) -; INFINITY requires loading the constant for _Float16 +; Check if INFINITY for _Float16 is materialized define i32 @replace_isinf_call_f16(half %x) { ; CHECK-LABEL: replace_isinf_call_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: mov w8, #31744 ; CHECK-NEXT: fabs h0, h0 -; CHECK-NEXT: ldr h1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: fmov h1, w8 ; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll @@ -45,18 +45,11 @@ ret double %fadda } -; Currently the folding doesn't work for f16 element types, since -0.0 is not treated as a legal f16 immediate. - define half @pred_fadda_nxv2f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.d }, p1/z, [x8] -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -69,13 +62,8 @@ define half @pred_fadda_nxv4f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.s }, p1/z, [x8] -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -88,13 +76,8 @@ define half @pred_fadda_nxv8f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.h }, p1/z, [x8] -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -47,14 +47,13 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8] -; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] ; CHECK-NEXT: fadda h0, p0, h0, z2.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -73,22 +72,21 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z1.h }, p0, [sp] ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8] +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #6, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [x8, #7, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 @@ -102,14 +100,12 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8] ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -508,19 +508,17 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z3.d, #0xffffffff80000000 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI14_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_1 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.d, #0x7fffffff ; CHECK-NEXT: mov z1.d, p1/m, z2.d @@ -535,23 +533,21 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: adrp x9, .LCPI15_1 -; CHECK-NEXT: add x9, x9, :lo12:.LCPI15_1 -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x9] -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, p1/m, z3.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z2.s, w9 ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h ; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 @@ -564,31 +560,29 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: adrp x9, .LCPI16_1 -; CHECK-NEXT: add x9, x9, :lo12:.LCPI16_1 ; CHECK-NEXT: uunpklo z2.s, z0.h -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #-2147483648 ; CHECK-NEXT: uunpkhi z6.s, z0.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x9] ; CHECK-NEXT: movprfx z4, z2 ; CHECK-NEXT: fcvtzs z4.s, p0/m, z2.h -; CHECK-NEXT: mov z5.s, w8 -; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h ; CHECK-NEXT: fcmge p2.h, p0/z, z6.h, z1.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z4.s, p1/m, z5.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z3.h -; CHECK-NEXT: mov z7.s, w8 +; CHECK-NEXT: mov z4.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h +; CHECK-NEXT: mov z7.s, w9 ; CHECK-NEXT: movprfx z0, z6 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z6.h -; CHECK-NEXT: sel z1.s, p2, z5.s, z0.s -; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z3.h +; CHECK-NEXT: sel z1.s, p2, z3.s, z0.s +; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z5.h ; CHECK-NEXT: sel z0.s, p1, z7.s, z4.s ; CHECK-NEXT: mov z1.s, p2/m, z7.s ; CHECK-NEXT: fcmuo p1.h, p0/z, z2.h, z2.h @@ -603,23 +597,21 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: mov w8, #63488 +; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_1 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w9 ; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -630,23 +622,21 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: mov w8, #63488 +; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1rh { z1.h }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_1 -; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w9 ; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.h, p1/m, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.h, p1/m, z2.h ; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -657,19 +647,17 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z3.d, #0x8000000000000000 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI19_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_1 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: mov z1.d, p1/m, z2.d @@ -684,32 +672,30 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.d, z0.s -; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: mov w9, #31743 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI20_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_1 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] -; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z1.h -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, z1.h +; CHECK-NEXT: mov z4.h, w9 ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z4.h +; CHECK-NEXT: mov z0.d, p1/m, z2.d ; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h ; CHECK-NEXT: movprfx z1, z5 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z2.h -; CHECK-NEXT: mov z1.d, p1/m, z3.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z2.h +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z4.h ; CHECK-NEXT: mov z0.d, p2/m, z6.d ; CHECK-NEXT: mov z1.d, p1/m, z6.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -400,18 +400,16 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) ret %x @@ -420,14 +418,13 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff @@ -440,24 +437,23 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: uunpkhi z4.s, z0.h -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] -; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzu z0.s, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h -; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h ; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret @@ -468,19 +464,17 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w9, #65535 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p2/m, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) ret %x @@ -489,14 +483,13 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff @@ -509,14 +502,13 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff @@ -529,24 +521,23 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z4.d, z0.s -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] -; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h -; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h ; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret