Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1126,6 +1126,8 @@ } if (Subtarget->hasSVE()) { + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { setOperationAction(ISD::BITREVERSE, VT, Custom); setOperationAction(ISD::BSWAP, VT, Custom); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -286,6 +286,8 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, @@ -702,6 +704,12 @@ (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. + def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), Index: llvm/test/CodeGen/AArch64/sve-fadda-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fadda-select.ll +++ llvm/test/CodeGen/AArch64/sve-fadda-select.ll @@ -45,18 +45,11 @@ ret double %fadda } -; Currently the folding doesn't work for f16 element types, since -0.0 is not treated as a legal f16 immediate. - define half @pred_fadda_nxv2f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.d }, p1/z, [x8] -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -69,13 +62,8 @@ define half @pred_fadda_nxv4f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.s }, p1/z, [x8] -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -88,13 +76,8 @@ define half @pred_fadda_nxv8f16(half %x, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.h }, p1/z, [x8] -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 Index: llvm/test/CodeGen/AArch64/sve-fp-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -47,14 +47,13 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8] -; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] ; CHECK-NEXT: fadda h0, p0, h0, z2.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -73,22 +72,21 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z1.h }, p0, [sp] ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8] +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #6, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [x8, #7, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 @@ -102,14 +100,12 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8] ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2